Mercurial > hg > octave-nkf > gnulib-hg
annotate lib/regex.h @ 7692:85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Mon, 27 Nov 2006 19:41:42 +0000 |
parents | b4806b1ec9e4 |
children | 7dcf8a1f2f5e |
rev | line source |
---|---|
14 | 1 /* Definitions for data structures and routines for the regular |
4020 | 2 expression library. |
6707
d0a48e08c98d
* lib/regex.h (regoff_t) [defined _REGEX_LARGE_OFFSETS]:
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
3 Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006 |
4020 | 4 Free Software Foundation, Inc. |
5 This file is part of the GNU C Library. | |
1283 | 6 |
4020 | 7 This program is free software; you can redistribute it and/or modify |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 2, or (at your option) | |
10 any later version. | |
14 | 11 |
4020 | 12 This program is distributed in the hope that it will be useful, |
1283 | 13 but WITHOUT ANY WARRANTY; without even the implied warranty of |
4020 | 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 GNU General Public License for more details. | |
14 | 16 |
4020 | 17 You should have received a copy of the GNU General Public License along |
18 with this program; if not, write to the Free Software Foundation, | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ |
14 | 20 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
21 #ifndef _REGEX_H |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
22 #define _REGEX_H 1 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
23 |
4304 | 24 #include <sys/types.h> |
25 | |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
26 /* Allow the use in C++ code. */ |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
27 #ifdef __cplusplus |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
28 extern "C" { |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
29 #endif |
14 | 30 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
31 /* Define __USE_GNU_REGEX to declare GNU extensions that violate the |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
32 POSIX name space rules. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
33 #undef __USE_GNU_REGEX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
34 #if (defined _GNU_SOURCE \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
35 || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
36 && !defined _XOPEN_SOURCE)) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
37 # define __USE_GNU_REGEX 1 |
6099 | 38 #endif |
389 | 39 |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
40 #ifdef _REGEX_LARGE_OFFSETS |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
41 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
42 /* Use types and values that are wide enough to represent signed and |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
43 unsigned byte offsets in memory. This currently works only when |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
44 the regex code is used outside of the GNU C library; it is not yet |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
45 supported within glibc itself, and glibc users should not define |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
46 _REGEX_LARGE_OFFSETS. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
47 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
48 /* The type of the offset of a byte within a string. |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
49 For historical reasons POSIX 1003.1-2004 requires that regoff_t be |
6707
d0a48e08c98d
* lib/regex.h (regoff_t) [defined _REGEX_LARGE_OFFSETS]:
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
50 at least as wide as off_t. However, many common POSIX platforms set |
d0a48e08c98d
* lib/regex.h (regoff_t) [defined _REGEX_LARGE_OFFSETS]:
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
51 regoff_t to the more-sensible ssize_t and the Open Group has |
d0a48e08c98d
* lib/regex.h (regoff_t) [defined _REGEX_LARGE_OFFSETS]:
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
52 signalled its intention to change the requirement to be that |
d0a48e08c98d
* lib/regex.h (regoff_t) [defined _REGEX_LARGE_OFFSETS]:
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
53 regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN |
d0a48e08c98d
* lib/regex.h (regoff_t) [defined _REGEX_LARGE_OFFSETS]:
Paul Eggert <eggert@cs.ucla.edu>
parents:
6184
diff
changeset
|
54 60 (2005-08-25). We don't know of any hosts where ssize_t or |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
55 ptrdiff_t is wider than ssize_t, so ssize_t is safe. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
56 typedef ssize_t regoff_t; |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
57 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
58 /* The type of nonnegative object indexes. Traditionally, GNU regex |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
59 uses 'int' for these. Code that uses __re_idx_t should work |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
60 regardless of whether the type is signed. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
61 typedef size_t __re_idx_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
62 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
63 /* The type of object sizes. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
64 typedef size_t __re_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
65 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
66 /* The type of object sizes, in places where the traditional code |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
67 uses unsigned long int. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
68 typedef size_t __re_long_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
69 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
70 #else |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
71 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
72 /* Use types that are binary-compatible with the traditional GNU regex |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
73 implementation, which mishandles strings longer than INT_MAX. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
74 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
75 typedef int regoff_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
76 typedef int __re_idx_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
77 typedef unsigned int __re_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
78 typedef unsigned long int __re_long_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
79 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
80 #endif |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
81 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
82 /* The following two types have to be signed and unsigned integer type |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
83 wide enough to hold a value of a pointer. For most ANSI compilers |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
84 ptrdiff_t and size_t should be likely OK. Still size of these two |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
85 types is 2 for Microsoft C. Ugh... */ |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
86 typedef long int s_reg_t; |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
87 typedef unsigned long int active_reg_t; |
14 | 88 |
89 /* The following bits are used to determine the regexp syntax we | |
90 recognize. The set/not-set meanings are chosen so that Emacs syntax | |
91 remains the value 0. The bits are given in alphabetical order, and | |
92 the definitions shifted by one from the previous bit; thus, when we | |
93 add or remove a bit, only one other definition need change. */ | |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
94 typedef unsigned long int reg_syntax_t; |
14 | 95 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
96 #ifdef __USE_GNU_REGEX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
97 |
14 | 98 /* If this bit is not set, then \ inside a bracket expression is literal. |
99 If set, then such a \ quotes the following character. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
100 # define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) |
14 | 101 |
102 /* If this bit is not set, then + and ? are operators, and \+ and \? are | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
103 literals. |
14 | 104 If set, then \+ and \? are operators and + and ? are literals. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
105 # define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) |
14 | 106 |
107 /* If this bit is set, then character classes are supported. They are: | |
108 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | |
109 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | |
110 If not set, then character classes are not supported. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
111 # define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) |
14 | 112 |
113 /* If this bit is set, then ^ and $ are always anchors (outside bracket | |
114 expressions, of course). | |
115 If this bit is not set, then it depends: | |
116 ^ is an anchor if it is at the beginning of a regular | |
117 expression or after an open-group or an alternation operator; | |
118 $ is an anchor if it is at the end of a regular expression, or | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
119 before a close-group or an alternation operator. |
14 | 120 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
121 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because |
14 | 122 POSIX draft 11.2 says that * etc. in leading positions is undefined. |
123 We already implemented a previous draft which made those constructs | |
124 invalid, though, so we haven't changed the code back. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
125 # define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) |
14 | 126 |
127 /* If this bit is set, then special characters are always special | |
128 regardless of where they are in the pattern. | |
129 If this bit is not set, then special characters are special only in | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
130 some contexts; otherwise they are ordinary. Specifically, |
14 | 131 * + ? and intervals are only special when not after the beginning, |
132 open-group, or alternation operator. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
133 # define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) |
14 | 134 |
135 /* If this bit is set, then *, +, ?, and { cannot be first in an re or | |
136 immediately after an alternation or begin-group operator. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
137 # define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) |
14 | 138 |
139 /* If this bit is set, then . matches newline. | |
140 If not set, then it doesn't. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
141 # define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) |
14 | 142 |
143 /* If this bit is set, then . doesn't match NUL. | |
144 If not set, then it does. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
145 # define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) |
14 | 146 |
147 /* If this bit is set, nonmatching lists [^...] do not match newline. | |
148 If not set, they do. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
149 # define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) |
14 | 150 |
151 /* If this bit is set, either \{...\} or {...} defines an | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
152 interval, depending on RE_NO_BK_BRACES. |
14 | 153 If not set, \{, \}, {, and } are literals. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
154 # define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) |
14 | 155 |
156 /* If this bit is set, +, ? and | aren't recognized as operators. | |
157 If not set, they are. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
158 # define RE_LIMITED_OPS (RE_INTERVALS << 1) |
14 | 159 |
160 /* If this bit is set, newline is an alternation operator. | |
161 If not set, newline is literal. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
162 # define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) |
14 | 163 |
164 /* If this bit is set, then `{...}' defines an interval, and \{ and \} | |
165 are literals. | |
166 If not set, then `\{...\}' defines an interval. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
167 # define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) |
14 | 168 |
169 /* If this bit is set, (...) defines a group, and \( and \) are literals. | |
170 If not set, \(...\) defines a group, and ( and ) are literals. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
171 # define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) |
14 | 172 |
173 /* If this bit is set, then \<digit> matches <digit>. | |
174 If not set, then \<digit> is a back-reference. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
175 # define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) |
14 | 176 |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
177 /* If this bit is set, then | is an alternation operator, and \| is literal. |
14 | 178 If not set, then \| is an alternation operator, and | is literal. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
179 # define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) |
14 | 180 |
181 /* If this bit is set, then an ending range point collating higher | |
182 than the starting range point, as in [z-a], is invalid. | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
183 If not set, then when ending range point collates higher than the |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
184 starting range point, the range is ignored. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
185 # define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) |
14 | 186 |
187 /* If this bit is set, then an unmatched ) is ordinary. | |
188 If not set, then an unmatched ) is invalid. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
189 # define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) |
14 | 190 |
389 | 191 /* If this bit is set, succeed as soon as we match the whole pattern, |
192 without further backtracking. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
193 # define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) |
389 | 194 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
195 /* If this bit is set, do not process the GNU regex operators. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
196 If not set, then the GNU regex operators are recognized. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
197 # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
198 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
199 /* If this bit is set, turn on internal regex debugging. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
200 If not set, and debugging was on, turn it off. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
201 This only works if regex.c is compiled -DDEBUG. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
202 We define this bit always, so that all that's needed to turn on |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
203 debugging is to recompile regex.c; the calling code can always have |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
204 this bit set, and it won't affect anything in the normal case. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
205 # define RE_DEBUG (RE_NO_GNU_OPS << 1) |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
206 |
3227 | 207 /* If this bit is set, a syntactically invalid interval is treated as |
208 a string of ordinary characters. For example, the ERE 'a{1' is | |
209 treated as 'a\{1'. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
210 # define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) |
3227 | 211 |
4020 | 212 /* If this bit is set, then ignore case when matching. |
213 If not set, then case is significant. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
214 # define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) |
4020 | 215 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
216 /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
217 for ^, because it is difficult to scan the regex backwards to find |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
218 whether ^ should be special. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
219 # define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
220 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
221 /* If this bit is set, then \{ cannot be first in an bre or |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
222 immediately after an alternation or begin-group operator. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
223 # define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
224 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
225 /* If this bit is set, then no_sub will be set to 1 during |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
226 re_compile_pattern. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
227 # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
228 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
229 #endif /* defined __USE_GNU_REGEX */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
230 |
14 | 231 /* This global variable defines the particular regexp syntax to use (for |
232 some interfaces). When a regexp is compiled, the syntax used is | |
233 stored in the pattern buffer, so changing this does not affect | |
234 already-compiled regexps. */ | |
235 extern reg_syntax_t re_syntax_options; | |
236 | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
237 #ifdef __USE_GNU_REGEX |
14 | 238 /* Define combinations of the above bits for the standard possibilities. |
239 (The [[[ comments delimit what gets put into the Texinfo file, so | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
240 don't delete them!) */ |
14 | 241 /* [[[begin syntaxes]]] */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
242 # define RE_SYNTAX_EMACS 0 |
14 | 243 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
244 # define RE_SYNTAX_AWK \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
245 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
246 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
247 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
248 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
249 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
250 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
251 # define RE_SYNTAX_GNU_AWK \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
252 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
253 & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
254 | RE_CONTEXT_INVALID_OPS )) |
14 | 255 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
256 # define RE_SYNTAX_POSIX_AWK \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
257 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
258 | RE_INTERVALS | RE_NO_GNU_OPS) |
14 | 259 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
260 # define RE_SYNTAX_GREP \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
261 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
262 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
263 | RE_NEWLINE_ALT) |
14 | 264 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
265 # define RE_SYNTAX_EGREP \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
266 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
267 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
268 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
269 | RE_NO_BK_VBAR) |
14 | 270 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
271 # define RE_SYNTAX_POSIX_EGREP \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
272 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
273 | RE_INVALID_INTERVAL_ORD) |
14 | 274 |
389 | 275 /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
276 # define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC |
389 | 277 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
278 # define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC |
14 | 279 |
280 /* Syntax bits common to both basic and extended POSIX regex syntax. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
281 # define _RE_SYNTAX_POSIX_COMMON \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
282 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
283 | RE_INTERVALS | RE_NO_EMPTY_RANGES) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
284 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
285 # define RE_SYNTAX_POSIX_BASIC \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
286 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) |
14 | 287 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
288 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
289 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
290 isn't minimal, since other operators, such as \`, aren't disabled. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
291 # define RE_SYNTAX_POSIX_MINIMAL_BASIC \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
292 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) |
14 | 293 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
294 # define RE_SYNTAX_POSIX_EXTENDED \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
295 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
296 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
297 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
298 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) |
14 | 299 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
300 /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
301 removed and RE_NO_BK_REFS is added. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
302 # define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
303 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
304 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
305 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
306 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
307 /* [[[end syntaxes]]] */ |
14 | 308 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
309 #endif /* defined __USE_GNU_REGEX */ |
14 | 310 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
311 #ifdef __USE_GNU_REGEX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
312 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
313 /* Maximum number of duplicates an interval can allow. POSIX-conforming |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
314 systems might define this in <limits.h>, but we want our |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
315 value, so remove any previous define. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
316 # ifdef RE_DUP_MAX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
317 # undef RE_DUP_MAX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
318 # endif |
6733
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
319 |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
320 /* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
321 the counter as a 2-byte signed integer. This is no longer true, so |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
322 RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
323 ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined. |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
324 However, there would be a huge performance problem if someone |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
325 actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
326 its historical value. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
327 # define RE_DUP_MAX (0x7fff) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
328 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
329 #endif /* defined __USE_GNU_REGEX */ |
14 | 330 |
331 | |
332 /* POSIX `cflags' bits (i.e., information for `regcomp'). */ | |
333 | |
334 /* If this bit is set, then use extended regular expression syntax. | |
335 If not set, then use basic regular expression syntax. */ | |
336 #define REG_EXTENDED 1 | |
337 | |
338 /* If this bit is set, then ignore case when matching. | |
339 If not set, then case is significant. */ | |
6099 | 340 #define REG_ICASE (1 << 1) |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
341 |
14 | 342 /* If this bit is set, then anchors do not match at newline |
343 characters in the string. | |
344 If not set, then anchors do match at newlines. */ | |
6099 | 345 #define REG_NEWLINE (1 << 2) |
14 | 346 |
347 /* If this bit is set, then report only success or fail in regexec. | |
348 If not set, then returns differ between not matching and errors. */ | |
6099 | 349 #define REG_NOSUB (1 << 3) |
14 | 350 |
351 | |
352 /* POSIX `eflags' bits (i.e., information for regexec). */ | |
353 | |
354 /* If this bit is set, then the beginning-of-line operator doesn't match | |
355 the beginning of the string (presumably because it's not the | |
356 beginning of a line). | |
357 If not set, then the beginning-of-line operator does match the | |
358 beginning of the string. */ | |
359 #define REG_NOTBOL 1 | |
360 | |
361 /* Like REG_NOTBOL, except for the end-of-line. */ | |
362 #define REG_NOTEOL (1 << 1) | |
363 | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
364 /* Use PMATCH[0] to delimit the start and end of the search in the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
365 buffer. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
366 #define REG_STARTEND (1 << 2) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
367 |
14 | 368 |
369 /* If any error codes are removed, changed, or added, update the | |
6099 | 370 `__re_error_msgid' table in regcomp.c. */ |
371 | |
14 | 372 typedef enum |
373 { | |
6099 | 374 _REG_ENOSYS = -1, /* This will never happen for this implementation. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
375 _REG_NOERROR = 0, /* Success. */ |
6099 | 376 _REG_NOMATCH, /* Didn't find a match (for regexec). */ |
14 | 377 |
378 /* POSIX regcomp return error codes. (In the order listed in the | |
379 standard.) */ | |
6099 | 380 _REG_BADPAT, /* Invalid pattern. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
381 _REG_ECOLLATE, /* Invalid collating element. */ |
6099 | 382 _REG_ECTYPE, /* Invalid character class name. */ |
383 _REG_EESCAPE, /* Trailing backslash. */ | |
384 _REG_ESUBREG, /* Invalid back reference. */ | |
385 _REG_EBRACK, /* Unmatched left bracket. */ | |
386 _REG_EPAREN, /* Parenthesis imbalance. */ | |
387 _REG_EBRACE, /* Unmatched \{. */ | |
388 _REG_BADBR, /* Invalid contents of \{\}. */ | |
389 _REG_ERANGE, /* Invalid range end. */ | |
390 _REG_ESPACE, /* Ran out of memory. */ | |
391 _REG_BADRPT, /* No preceding re for repetition op. */ | |
14 | 392 |
393 /* Error codes we've added. */ | |
6099 | 394 _REG_EEND, /* Premature end. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
395 _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
396 _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
397 } reg_errcode_t; |
6099 | 398 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
399 #ifdef _XOPEN_SOURCE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
400 # define REG_ENOSYS _REG_ENOSYS |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
401 #endif |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
402 #define REG_NOERROR _REG_NOERROR |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
403 #define REG_NOMATCH _REG_NOMATCH |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
404 #define REG_BADPAT _REG_BADPAT |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
405 #define REG_ECOLLATE _REG_ECOLLATE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
406 #define REG_ECTYPE _REG_ECTYPE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
407 #define REG_EESCAPE _REG_EESCAPE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
408 #define REG_ESUBREG _REG_ESUBREG |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
409 #define REG_EBRACK _REG_EBRACK |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
410 #define REG_EPAREN _REG_EPAREN |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
411 #define REG_EBRACE _REG_EBRACE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
412 #define REG_BADBR _REG_BADBR |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
413 #define REG_ERANGE _REG_ERANGE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
414 #define REG_ESPACE _REG_ESPACE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
415 #define REG_BADRPT _REG_BADRPT |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
416 #define REG_EEND _REG_EEND |
6099 | 417 #define REG_ESIZE _REG_ESIZE |
418 #define REG_ERPAREN _REG_ERPAREN | |
14 | 419 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
420 /* struct re_pattern_buffer normally uses member names like `buffer' |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
421 that POSIX does not allow. In POSIX mode these members have names |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
422 with leading `re_' (e.g., `re_buffer'). */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
423 #ifdef __USE_GNU_REGEX |
6099 | 424 # define _REG_RE_NAME(id) id |
425 # define _REG_RM_NAME(id) id | |
426 #else | |
427 # define _REG_RE_NAME(id) re_##id | |
428 # define _REG_RM_NAME(id) rm_##id | |
429 #endif | |
430 | |
431 /* The user can specify the type of the re_translate member by | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
432 defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
433 char *. This pollutes the POSIX name space, so in POSIX mode just |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
434 use unsigned char *. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
435 #ifdef __USE_GNU_REGEX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
436 # ifndef RE_TRANSLATE_TYPE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
437 # define RE_TRANSLATE_TYPE unsigned char * |
6099 | 438 # endif |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
439 # define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
440 #else |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
441 # define REG_TRANSLATE_TYPE unsigned char * |
6099 | 442 #endif |
443 | |
14 | 444 /* This data structure represents a compiled pattern. Before calling |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
445 the pattern compiler, the fields `buffer', `allocated', `fastmap', |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
446 `translate', and `no_sub' can be set. After the pattern has been |
14 | 447 compiled, the `re_nsub' field is available. All other fields are |
448 private to the regex routines. */ | |
449 | |
450 struct re_pattern_buffer | |
451 { | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
452 /* Space that holds the compiled pattern. It is declared as |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
453 `unsigned char *' because its elements are sometimes used as |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
454 array indexes. */ |
6099 | 455 unsigned char *_REG_RE_NAME (buffer); |
14 | 456 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
457 /* Number of bytes to which `buffer' points. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
458 __re_long_size_t _REG_RE_NAME (allocated); |
14 | 459 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
460 /* Number of bytes actually used in `buffer'. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
461 __re_long_size_t _REG_RE_NAME (used); |
14 | 462 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
463 /* Syntax setting with which the pattern was compiled. */ |
6099 | 464 reg_syntax_t _REG_RE_NAME (syntax); |
14 | 465 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
466 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
467 fastmap, if there is one, to skip over impossible starting points |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
468 for matches. */ |
6099 | 469 char *_REG_RE_NAME (fastmap); |
14 | 470 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
471 /* Either a translate table to apply to all characters before |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
472 comparing them, or zero for no translation. The translation is |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
473 applied to a pattern when it is compiled and to a string when it |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
474 is matched. */ |
6099 | 475 REG_TRANSLATE_TYPE _REG_RE_NAME (translate); |
14 | 476 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
477 /* Number of subexpressions found by the compiler. */ |
14 | 478 size_t re_nsub; |
479 | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
480 /* Zero if this pattern cannot match the empty string, one else. |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
481 Well, in truth it's used only in `re_search_2', to see whether or |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
482 not we should use the fastmap, so we don't set this absolutely |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
483 perfectly; see `re_compile_fastmap' (the `duplicate' case). */ |
6099 | 484 unsigned int _REG_RE_NAME (can_be_null) : 1; |
14 | 485 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
486 /* If REGS_UNALLOCATED, allocate space in the `regs' structure |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
487 for `max (RE_NREGS, re_nsub + 1)' groups. |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
488 If REGS_REALLOCATE, reallocate space if necessary. |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
489 If REGS_FIXED, use what's there. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
490 #ifdef __USE_GNU_REGEX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
491 # define REGS_UNALLOCATED 0 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
492 # define REGS_REALLOCATE 1 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
493 # define REGS_FIXED 2 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
494 #endif |
6099 | 495 unsigned int _REG_RE_NAME (regs_allocated) : 2; |
14 | 496 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
497 /* Set to zero when `regex_compile' compiles a pattern; set to one |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
498 by `re_compile_fastmap' if it updates the fastmap. */ |
6099 | 499 unsigned int _REG_RE_NAME (fastmap_accurate) : 1; |
14 | 500 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
501 /* If set, `re_match_2' does not return information about |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
502 subexpressions. */ |
6099 | 503 unsigned int _REG_RE_NAME (no_sub) : 1; |
14 | 504 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
505 /* If set, a beginning-of-line anchor doesn't match at the beginning |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
506 of the string. */ |
6099 | 507 unsigned int _REG_RE_NAME (not_bol) : 1; |
14 | 508 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
509 /* Similarly for an end-of-line anchor. */ |
6099 | 510 unsigned int _REG_RE_NAME (not_eol) : 1; |
14 | 511 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
512 /* If true, an anchor at a newline matches. */ |
6099 | 513 unsigned int _REG_RE_NAME (newline_anchor) : 1; |
14 | 514 |
515 /* [[[end pattern_buffer]]] */ | |
516 }; | |
517 | |
518 typedef struct re_pattern_buffer regex_t; | |
519 | |
520 /* This is the structure we store register match data in. See | |
521 regex.texinfo for a full description of what registers match. */ | |
522 struct re_registers | |
523 { | |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
524 __re_size_t _REG_RM_NAME (num_regs); |
6099 | 525 regoff_t *_REG_RM_NAME (start); |
526 regoff_t *_REG_RM_NAME (end); | |
14 | 527 }; |
528 | |
529 | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
530 /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, |
14 | 531 `re_match_2' returns information about at least this many registers |
532 the first time a `regs' structure is passed. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
533 #if !defined RE_NREGS && defined __USE_GNU_REGEX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
534 # define RE_NREGS 30 |
14 | 535 #endif |
536 | |
537 | |
538 /* POSIX specification for registers. Aside from the different names than | |
539 `re_registers', POSIX uses an array of structures, instead of a | |
540 structure of arrays. */ | |
541 typedef struct | |
542 { | |
543 regoff_t rm_so; /* Byte offset from string's start to substring's start. */ | |
544 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ | |
545 } regmatch_t; | |
546 | |
547 /* Declarations for routines. */ | |
548 | |
549 /* Sets the current default syntax to SYNTAX, and return the old syntax. | |
550 You can also simply assign to the `re_syntax_options' variable. */ | |
6099 | 551 extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); |
14 | 552 |
553 /* Compile the regular expression PATTERN, with length LENGTH | |
554 and syntax given by the global `re_syntax_options', into the buffer | |
555 BUFFER. Return NULL if successful, and an error string if not. */ | |
6099 | 556 extern const char *re_compile_pattern (const char *__pattern, size_t __length, |
557 struct re_pattern_buffer *__buffer); | |
14 | 558 |
559 | |
560 /* Compile a fastmap for the compiled pattern in BUFFER; used to | |
561 accelerate searches. Return 0 if successful and -2 if was an | |
562 internal error. */ | |
6099 | 563 extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); |
14 | 564 |
565 | |
566 /* Search in the string STRING (with length LENGTH) for the pattern | |
567 compiled into BUFFER. Start searching at position START, for RANGE | |
568 characters. Return the starting position of the match, -1 for no | |
569 match, or -2 for an internal error. Also return register | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
570 information in REGS (if REGS and BUFFER->no_sub are nonzero). */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
571 extern regoff_t re_search (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
572 const char *__string, __re_idx_t __length, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
573 __re_idx_t __start, regoff_t __range, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
574 struct re_registers *__regs); |
14 | 575 |
576 | |
577 /* Like `re_search', but search in the concatenation of STRING1 and | |
578 STRING2. Also, stop searching at index START + STOP. */ | |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
579 extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
580 const char *__string1, __re_idx_t __length1, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
581 const char *__string2, __re_idx_t __length2, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
582 __re_idx_t __start, regoff_t __range, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
583 struct re_registers *__regs, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
584 __re_idx_t __stop); |
14 | 585 |
586 | |
587 /* Like `re_search', but return how many characters in STRING the regexp | |
588 in BUFFER matched, starting at position START. */ | |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
589 extern regoff_t re_match (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
590 const char *__string, __re_idx_t __length, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
591 __re_idx_t __start, struct re_registers *__regs); |
14 | 592 |
593 | |
594 /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ | |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
595 extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
596 const char *__string1, __re_idx_t __length1, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
597 const char *__string2, __re_idx_t __length2, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
598 __re_idx_t __start, struct re_registers *__regs, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
599 __re_idx_t __stop); |
14 | 600 |
601 | |
602 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | |
603 ENDS. Subsequent matches using BUFFER and REGS will use this memory | |
604 for recording register information. STARTS and ENDS must be | |
605 allocated with malloc, and must each be at least `NUM_REGS * sizeof | |
606 (regoff_t)' bytes long. | |
607 | |
608 If NUM_REGS == 0, then subsequent matches should allocate their own | |
609 register data. | |
610 | |
611 Unless this function is called, the first search or match using | |
612 PATTERN_BUFFER will allocate its own register data, without | |
613 freeing the old data. */ | |
6099 | 614 extern void re_set_registers (struct re_pattern_buffer *__buffer, |
615 struct re_registers *__regs, | |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
616 __re_size_t __num_regs, |
6099 | 617 regoff_t *__starts, regoff_t *__ends); |
14 | 618 |
1639 | 619 #if defined _REGEX_RE_COMP || defined _LIBC |
1283 | 620 # ifndef _CRAY |
14 | 621 /* 4.2 bsd compatibility. */ |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
622 extern char *re_comp (const char *); |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
623 extern int re_exec (const char *); |
1283 | 624 # endif |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
625 #endif |
14 | 626 |
2927 | 627 /* GCC 2.95 and later have "__restrict"; C99 compilers have |
628 "restrict", and "configure" may have defined "restrict". */ | |
629 #ifndef __restrict | |
630 # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) | |
631 # if defined restrict || 199901L <= __STDC_VERSION__ | |
632 # define __restrict restrict | |
633 # else | |
634 # define __restrict | |
635 # endif | |
636 # endif | |
2940
c7143052abca
(__restrict_arr): Move definition out of #ifndef block.
Jim Meyering <jim@meyering.net>
parents:
2927
diff
changeset
|
637 #endif |
7689
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
638 /* gcc 3.1 and up support the [restrict] syntax. Don't trust |
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
639 sys/cdefs.h's definition of __restrict_arr, though, as it |
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
640 mishandles gcc -ansi -pedantic. */ |
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
641 #undef __restrict_arr |
7692
85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7689
diff
changeset
|
642 #if ((199901L <= __STDC_VERSION__ \ |
85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7689
diff
changeset
|
643 || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ |
85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7689
diff
changeset
|
644 && !__STRICT_ANSI__)) \ |
85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7689
diff
changeset
|
645 && !defined __GNUG__) |
85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7689
diff
changeset
|
646 # define __restrict_arr __restrict |
85409a7d11bc
* lib/regex.h (__restrict_arr): Redo logic of #if, for clarity.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7689
diff
changeset
|
647 #else |
7689
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
648 # define __restrict_arr |
3684
c8ed84e015e6
(__restrict_arr): Update from libc.
Jim Meyering <jim@meyering.net>
parents:
3336
diff
changeset
|
649 #endif |
2927 | 650 |
14 | 651 /* POSIX compatibility. */ |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
652 extern int regcomp (regex_t *__restrict __preg, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
653 const char *__restrict __pattern, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
654 int __cflags); |
1466 | 655 |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
656 extern int regexec (const regex_t *__restrict __preg, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
657 const char *__restrict __string, size_t __nmatch, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
658 regmatch_t __pmatch[__restrict_arr], |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
659 int __eflags); |
1466 | 660 |
6104
1e308ce32c4c
* config/srclist.txt: Add glibc bug 1240.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6100
diff
changeset
|
661 extern size_t regerror (int __errcode, const regex_t *__restrict __preg, |
1e308ce32c4c
* config/srclist.txt: Add glibc bug 1240.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6100
diff
changeset
|
662 char *__restrict __errbuf, size_t __errbuf_size); |
1466 | 663 |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
664 extern void regfree (regex_t *__preg); |
14 | 665 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
666 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
667 #ifdef __cplusplus |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
668 } |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
669 #endif /* C++ */ |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
670 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
671 #endif /* regex.h */ |