comparison regex.c @ 2841:a19cab2eaa6d

* regex.c: Indent cpp directives and remove parens after `defined'. (PTR_TO_OFFSET, POS_AS_IN_BUFFER): Move to a better place. (ISDIGIT, ISCNTRL, ISXDIGIT) [!emacs]: Remove duplicate definition. (regex_compile): Use RE_FRUGAL instead of RE_ALL_GREEDY. (re_compile_pattern): Use size_t for length. (init_syntax_once): Move to a better place. * regex.h: Merge changes from GNU libc. Indent cpp directives. (RE_FRUGAL): Replaces RE_ALL_GREEDY (inverted meaning).
author Stefan Monnier <monnier@iro.umontreal.ca>
date Mon, 28 Aug 2000 00:37:22 +0000
parents a52061981f50
children 99ba697d0902
comparison
equal deleted inserted replaced
2840:89cc967bcb03 2841:a19cab2eaa6d
1 /* Extended regular expression matching and search library, version 1 /* Extended regular expression matching and search library, version
2 0.12. (Implements POSIX draft P10003.2/D11.2, except for 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc. 5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc.
6 6
7 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
26 can simply be decremented when popping the failure_point without having 26 can simply be decremented when popping the failure_point without having
27 to stack up failure_count entries. 27 to stack up failure_count entries.
28 */ 28 */
29 29
30 /* AIX requires this to be the first thing in the file. */ 30 /* AIX requires this to be the first thing in the file. */
31 #if defined (_AIX) && !defined (REGEX_MALLOC) 31 #if defined _AIX && !defined REGEX_MALLOC
32 #pragma alloca 32 #pragma alloca
33 #endif 33 #endif
34 34
35 #undef _GNU_SOURCE 35 #undef _GNU_SOURCE
36 #define _GNU_SOURCE 36 #define _GNU_SOURCE
37 37
38 #ifdef emacs
39 /* Converts the pointer to the char to BEG-based offset from the start. */
40 #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
41 #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
42 #endif
43
44 #ifdef HAVE_CONFIG_H 38 #ifdef HAVE_CONFIG_H
45 #include <config.h> 39 # include <config.h>
46 #endif 40 #endif
47 41
48 /* We need this for `regex.h', and perhaps for the Emacs include files. */ 42 /* We need this for `regex.h', and perhaps for the Emacs include files. */
49 #include <sys/types.h> 43 #include <sys/types.h>
50 44
51 /* This is for other GNU distributions with internationalized messages. */ 45 /* This is for other GNU distributions with internationalized messages. */
52 #if HAVE_LIBINTL_H || defined (_LIBC) 46 #if HAVE_LIBINTL_H || defined _LIBC
53 # include <libintl.h> 47 # include <libintl.h>
54 #else 48 #else
55 # define gettext(msgid) (msgid) 49 # define gettext(msgid) (msgid)
56 #endif 50 #endif
57 51
58 #ifndef gettext_noop 52 #ifndef gettext_noop
59 /* This define is so xgettext can find the internationalizable 53 /* This define is so xgettext can find the internationalizable
60 strings. */ 54 strings. */
61 #define gettext_noop(String) String 55 # define gettext_noop(String) String
62 #endif 56 #endif
63 57
64 /* The `emacs' switch turns on certain matching commands 58 /* The `emacs' switch turns on certain matching commands
65 that make sense only in Emacs. */ 59 that make sense only in Emacs. */
66 #ifdef emacs 60 #ifdef emacs
67 61
68 #include "lisp.h" 62 # include "lisp.h"
69 #include "buffer.h" 63 # include "buffer.h"
70 64
71 /* Make syntax table lookup grant data in gl_state. */ 65 /* Make syntax table lookup grant data in gl_state. */
72 #define SYNTAX_ENTRY_VIA_PROPERTY 66 # define SYNTAX_ENTRY_VIA_PROPERTY
73 67
74 #include "syntax.h" 68 # include "syntax.h"
75 #include "charset.h" 69 # include "charset.h"
76 #include "category.h" 70 # include "category.h"
77 71
78 #define malloc xmalloc 72 # define malloc xmalloc
79 #define realloc xrealloc 73 # define realloc xrealloc
80 #define free xfree 74 # define free xfree
81 75
82 #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 76 /* Converts the pointer to the char to BEG-based offset from the start. */
83 #define RE_STRING_CHAR(p, s) \ 77 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
78 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
79
80 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
81 # define RE_STRING_CHAR(p, s) \
84 (multibyte ? (STRING_CHAR (p, s)) : (*(p))) 82 (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
85 #define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ 83 # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
86 (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p))) 84 (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
87 85
88 /* Set C a (possibly multibyte) character before P. P points into a 86 /* Set C a (possibly multibyte) character before P. P points into a
89 string which is the virtual concatenation of STR1 (which ends at 87 string which is the virtual concatenation of STR1 (which ends at
90 END1) or STR2 (which ends at END2). */ 88 END1) or STR2 (which ends at END2). */
91 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 89 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
92 do { \ 90 do { \
93 if (multibyte) \ 91 if (multibyte) \
94 { \ 92 { \
95 re_char *dtemp = (p) == (str2) ? (end1) : (p); \ 93 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
96 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ 94 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
97 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \ 95 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
98 c = STRING_CHAR (dtemp, (p) - dtemp); \ 96 c = STRING_CHAR (dtemp, (p) - dtemp); \
99 } \ 97 } \
100 else \ 98 else \
101 (c = ((p) == (str2) ? (end1) : (p))[-1]); \ 99 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
102 } while (0) 100 } while (0)
103 101
105 #else /* not emacs */ 103 #else /* not emacs */
106 104
107 /* If we are not linking with Emacs proper, 105 /* If we are not linking with Emacs proper,
108 we can't use the relocating allocator 106 we can't use the relocating allocator
109 even if config.h says that we can. */ 107 even if config.h says that we can. */
110 #undef REL_ALLOC 108 # undef REL_ALLOC
111 109
112 #if defined (STDC_HEADERS) || defined (_LIBC) 110 # if defined STDC_HEADERS || defined _LIBC
113 #include <stdlib.h> 111 # include <stdlib.h>
114 #else 112 # else
115 char *malloc (); 113 char *malloc ();
116 char *realloc (); 114 char *realloc ();
117 #endif 115 # endif
118 116
119 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 117 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
120 If nothing else has been done, use the method below. */ 118 If nothing else has been done, use the method below. */
121 #ifdef INHIBIT_STRING_HEADER 119 # ifdef INHIBIT_STRING_HEADER
122 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) 120 # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
123 #if !defined (bzero) && !defined (bcopy) 121 # if !defined bzero && !defined bcopy
124 #undef INHIBIT_STRING_HEADER 122 # undef INHIBIT_STRING_HEADER
125 #endif 123 # endif
126 #endif 124 # endif
127 #endif 125 # endif
128 126
129 /* This is the normal way of making sure we have a bcopy and a bzero. 127 /* This is the normal way of making sure we have a bcopy and a bzero.
130 This is used in most programs--a few other programs avoid this 128 This is used in most programs--a few other programs avoid this
131 by defining INHIBIT_STRING_HEADER. */ 129 by defining INHIBIT_STRING_HEADER. */
132 #ifndef INHIBIT_STRING_HEADER 130 # ifndef INHIBIT_STRING_HEADER
133 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) 131 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
134 #include <string.h> 132 # include <string.h>
135 #ifndef bcmp 133 # ifndef bcmp
136 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) 134 # define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
137 #endif 135 # endif
138 #ifndef bcopy 136 # ifndef bcopy
139 #define bcopy(s, d, n) memcpy ((d), (s), (n)) 137 # define bcopy(s, d, n) memcpy ((d), (s), (n))
140 #endif 138 # endif
141 #ifndef bzero 139 # ifndef bzero
142 #define bzero(s, n) memset ((s), 0, (n)) 140 # define bzero(s, n) memset ((s), 0, (n))
143 #endif 141 # endif
144 #else 142 # else
145 #include <strings.h> 143 # include <strings.h>
146 #endif 144 # endif
147 #endif 145 # endif
148 146
149 /* Define the syntax stuff for \<, \>, etc. */ 147 /* Define the syntax stuff for \<, \>, etc. */
150 148
151 /* Sword must be nonzero for the wordchar pattern commands in re_match_2. */ 149 /* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
152 enum syntaxcode { Swhitespace = 0, Sword = 1 }; 150 enum syntaxcode { Swhitespace = 0, Sword = 1 };
153 151
154 #ifdef SWITCH_ENUM_BUG 152 # ifdef SWITCH_ENUM_BUG
155 #define SWITCH_ENUM_CAST(x) ((int)(x)) 153 # define SWITCH_ENUM_CAST(x) ((int)(x))
156 #else 154 # else
157 #define SWITCH_ENUM_CAST(x) (x) 155 # define SWITCH_ENUM_CAST(x) (x)
158 #endif 156 # endif
159 157
160 #ifdef SYNTAX_TABLE 158 # define SYNTAX(c) re_syntax_table[c]
161
162 extern char *re_syntax_table;
163
164 #else /* not SYNTAX_TABLE */
165
166 /* How many characters in the character set. */
167 #define CHAR_SET_SIZE 256
168
169 static char re_syntax_table[CHAR_SET_SIZE];
170
171 static void
172 init_syntax_once ()
173 {
174 register int c;
175 static int done = 0;
176
177 if (done)
178 return;
179
180 bzero (re_syntax_table, sizeof re_syntax_table);
181
182 for (c = 'a'; c <= 'z'; c++)
183 re_syntax_table[c] = Sword;
184
185 for (c = 'A'; c <= 'Z'; c++)
186 re_syntax_table[c] = Sword;
187
188 for (c = '0'; c <= '9'; c++)
189 re_syntax_table[c] = Sword;
190
191 re_syntax_table['_'] = Sword;
192
193 done = 1;
194 }
195
196 #endif /* not SYNTAX_TABLE */
197
198 #define SYNTAX(c) re_syntax_table[c]
199 159
200 /* Dummy macros for non-Emacs environments. */ 160 /* Dummy macros for non-Emacs environments. */
201 #define BASE_LEADING_CODE_P(c) (0) 161 # define BASE_LEADING_CODE_P(c) (0)
202 #define CHAR_CHARSET(c) 0 162 # define CHAR_CHARSET(c) 0
203 #define CHARSET_LEADING_CODE_BASE(c) 0 163 # define CHARSET_LEADING_CODE_BASE(c) 0
204 #define MAX_MULTIBYTE_LENGTH 1 164 # define MAX_MULTIBYTE_LENGTH 1
205 #define RE_MULTIBYTE_P(x) 0 165 # define RE_MULTIBYTE_P(x) 0
206 #define WORD_BOUNDARY_P(c1, c2) (0) 166 # define WORD_BOUNDARY_P(c1, c2) (0)
207 #define CHAR_HEAD_P(p) (1) 167 # define CHAR_HEAD_P(p) (1)
208 #define SINGLE_BYTE_CHAR_P(c) (1) 168 # define SINGLE_BYTE_CHAR_P(c) (1)
209 #define SAME_CHARSET_P(c1, c2) (1) 169 # define SAME_CHARSET_P(c1, c2) (1)
210 #define MULTIBYTE_FORM_LENGTH(p, s) (1) 170 # define MULTIBYTE_FORM_LENGTH(p, s) (1)
211 #define STRING_CHAR(p, s) (*(p)) 171 # define STRING_CHAR(p, s) (*(p))
212 #define RE_STRING_CHAR STRING_CHAR 172 # define RE_STRING_CHAR STRING_CHAR
213 #define CHAR_STRING(c, s) (*(s) = (c), 1) 173 # define CHAR_STRING(c, s) (*(s) = (c), 1)
214 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) 174 # define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
215 #define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH 175 # define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
216 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 176 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
217 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) 177 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
218 #define MAKE_CHAR(charset, c1, c2) (c1) 178 # define MAKE_CHAR(charset, c1, c2) (c1)
219 #endif /* not emacs */ 179 #endif /* not emacs */
220 180
221 #ifndef RE_TRANSLATE 181 #ifndef RE_TRANSLATE
222 #define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C]) 182 # define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
223 #define RE_TRANSLATE_P(TBL) (TBL) 183 # define RE_TRANSLATE_P(TBL) (TBL)
224 #endif 184 #endif
225 185
226 /* Get the interface, including the syntax bits. */ 186 /* Get the interface, including the syntax bits. */
227 #include "regex.h" 187 #include "regex.h"
228 188
230 #include <ctype.h> 190 #include <ctype.h>
231 191
232 #ifdef emacs 192 #ifdef emacs
233 193
234 /* 1 if C is an ASCII character. */ 194 /* 1 if C is an ASCII character. */
235 #define IS_REAL_ASCII(c) ((c) < 0200) 195 # define IS_REAL_ASCII(c) ((c) < 0200)
236 196
237 /* 1 if C is a unibyte character. */ 197 /* 1 if C is a unibyte character. */
238 #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) 198 # define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
239 199
240 /* The Emacs definitions should not be directly affected by locales. */ 200 /* The Emacs definitions should not be directly affected by locales. */
241 201
242 /* In Emacs, these are only used for single-byte characters. */ 202 /* In Emacs, these are only used for single-byte characters. */
243 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9') 203 # define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
244 #define ISCNTRL(c) ((c) < ' ') 204 # define ISCNTRL(c) ((c) < ' ')
245 #define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \ 205 # define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \
246 || ((c) >= 'a' && (c) <= 'f') \ 206 || ((c) >= 'a' && (c) <= 'f') \
247 || ((c) >= 'A' && (c) <= 'F')) 207 || ((c) >= 'A' && (c) <= 'F'))
248 208
249 /* This is only used for single-byte characters. */ 209 /* This is only used for single-byte characters. */
250 #define ISBLANK(c) ((c) == ' ' || (c) == '\t') 210 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
251 211
252 /* The rest must handle multibyte characters. */ 212 /* The rest must handle multibyte characters. */
253 213
254 #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ 214 # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
255 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \ 215 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
256 : 1) 216 : 1)
257 217
258 #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ 218 # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
259 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ 219 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
260 : 1) 220 : 1)
261 221
262 #define ISALNUM(c) (IS_REAL_ASCII (c) \ 222 # define ISALNUM(c) (IS_REAL_ASCII (c) \
263 ? (((c) >= 'a' && (c) <= 'z') \ 223 ? (((c) >= 'a' && (c) <= 'z') \
264 || ((c) >= 'A' && (c) <= 'Z') \ 224 || ((c) >= 'A' && (c) <= 'Z') \
265 || ((c) >= '0' && (c) <= '9')) \ 225 || ((c) >= '0' && (c) <= '9')) \
266 : SYNTAX (c) == Sword) 226 : SYNTAX (c) == Sword)
267 227
268 #define ISALPHA(c) (IS_REAL_ASCII (c) \ 228 # define ISALPHA(c) (IS_REAL_ASCII (c) \
269 ? (((c) >= 'a' && (c) <= 'z') \ 229 ? (((c) >= 'a' && (c) <= 'z') \
270 || ((c) >= 'A' && (c) <= 'Z')) \ 230 || ((c) >= 'A' && (c) <= 'Z')) \
271 : SYNTAX (c) == Sword) 231 : SYNTAX (c) == Sword)
272 232
273 #define ISLOWER(c) (LOWERCASEP (c)) 233 # define ISLOWER(c) (LOWERCASEP (c))
274 234
275 #define ISPUNCT(c) (IS_REAL_ASCII (c) \ 235 # define ISPUNCT(c) (IS_REAL_ASCII (c) \
276 ? ((c) > ' ' && (c) < 0177 \ 236 ? ((c) > ' ' && (c) < 0177 \
277 && !(((c) >= 'a' && (c) <= 'z') \ 237 && !(((c) >= 'a' && (c) <= 'z') \
278 || ((c) >= 'A' && (c) <= 'Z') \ 238 || ((c) >= 'A' && (c) <= 'Z') \
279 || ((c) >= '0' && (c) <= '9'))) \ 239 || ((c) >= '0' && (c) <= '9'))) \
280 : SYNTAX (c) != Sword) 240 : SYNTAX (c) != Sword)
281 241
282 #define ISSPACE(c) (SYNTAX (c) == Swhitespace) 242 # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
283 243
284 #define ISUPPER(c) (UPPERCASEP (c)) 244 # define ISUPPER(c) (UPPERCASEP (c))
285 245
286 #define ISWORD(c) (SYNTAX (c) == Sword) 246 # define ISWORD(c) (SYNTAX (c) == Sword)
287 247
288 #else /* not emacs */ 248 #else /* not emacs */
289 249
290 /* Jim Meyering writes: 250 /* Jim Meyering writes:
291 251
296 STDC_HEADERS is defined, then autoconf has verified that the ctype 256 STDC_HEADERS is defined, then autoconf has verified that the ctype
297 macros don't need to be guarded with references to isascii. ... 257 macros don't need to be guarded with references to isascii. ...
298 Defining isascii to 1 should let any compiler worth its salt 258 Defining isascii to 1 should let any compiler worth its salt
299 eliminate the && through constant folding." */ 259 eliminate the && through constant folding." */
300 260
301 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 261 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
302 #define ISASCII(c) 1 262 # define ISASCII(c) 1
303 #else 263 # else
304 #define ISASCII(c) isascii(c) 264 # define ISASCII(c) isascii(c)
305 #endif 265 # endif
306 266
307 /* 1 if C is an ASCII character. */ 267 /* 1 if C is an ASCII character. */
308 #define IS_REAL_ASCII(c) ((c) < 0200) 268 # define IS_REAL_ASCII(c) ((c) < 0200)
309 269
310 /* This distinction is not meaningful, except in Emacs. */ 270 /* This distinction is not meaningful, except in Emacs. */
311 #define ISUNIBYTE(c) 1 271 # define ISUNIBYTE(c) 1
312 272
313 #define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 273 # ifdef isblank
314 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 274 # define ISBLANK(c) (ISASCII (c) && isblank (c))
315 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 275 # else
316 276 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
317 #ifdef isblank 277 # endif
318 #define ISBLANK(c) (ISASCII (c) && isblank (c)) 278 # ifdef isgraph
319 #else 279 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
320 #define ISBLANK(c) ((c) == ' ' || (c) == '\t') 280 # else
321 #endif 281 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
322 #ifdef isgraph 282 # endif
323 #define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 283
324 #else 284 # define ISPRINT(c) (ISASCII (c) && isprint (c))
325 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 285 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
326 #endif 286 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
327 287 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
328 #define ISPRINT(c) (ISASCII (c) && isprint (c)) 288 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
329 #define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 289 # define ISLOWER(c) (ISASCII (c) && islower (c))
330 #define ISALNUM(c) (ISASCII (c) && isalnum (c)) 290 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
331 #define ISALPHA(c) (ISASCII (c) && isalpha (c)) 291 # define ISSPACE(c) (ISASCII (c) && isspace (c))
332 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 292 # define ISUPPER(c) (ISASCII (c) && isupper (c))
333 #define ISLOWER(c) (ISASCII (c) && islower (c)) 293 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
334 #define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 294
335 #define ISSPACE(c) (ISASCII (c) && isspace (c)) 295 # define ISWORD(c) ISALPHA(c)
336 #define ISUPPER(c) (ISASCII (c) && isupper (c)) 296
337 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 297 # ifdef SYNTAX_TABLE
338 298
339 #define ISWORD(c) ISALPHA(c) 299 extern char *re_syntax_table;
300
301 # else /* not SYNTAX_TABLE */
302
303 /* How many characters in the character set. */
304 # define CHAR_SET_SIZE 256
305
306 static char re_syntax_table[CHAR_SET_SIZE];
307
308 static void
309 init_syntax_once ()
310 {
311 register int c;
312 static int done = 0;
313
314 if (done)
315 return;
316
317 bzero (re_syntax_table, sizeof re_syntax_table);
318
319 for (c = 'a'; c <= 'z'; c++)
320 re_syntax_table[c] = Sword;
321
322 for (c = 'A'; c <= 'Z'; c++)
323 re_syntax_table[c] = Sword;
324
325 for (c = '0'; c <= '9'; c++)
326 re_syntax_table[c] = Sword;
327
328 re_syntax_table['_'] = Sword;
329
330 done = 1;
331 }
332
333 # endif /* not SYNTAX_TABLE */
340 334
341 #endif /* not emacs */ 335 #endif /* not emacs */
342 336
343 #ifndef NULL 337 #ifndef NULL
344 #define NULL (void *)0 338 # define NULL (void *)0
345 #endif 339 #endif
346 340
347 /* We remove any previous definition of `SIGN_EXTEND_CHAR', 341 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
348 since ours (we hope) works properly with all combinations of 342 since ours (we hope) works properly with all combinations of
349 machines, compilers, `char' and `unsigned char' argument types. 343 machines, compilers, `char' and `unsigned char' argument types.
350 (Per Bothner suggested the basic approach.) */ 344 (Per Bothner suggested the basic approach.) */
351 #undef SIGN_EXTEND_CHAR 345 #undef SIGN_EXTEND_CHAR
352 #if __STDC__ 346 #if __STDC__
353 #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 347 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
354 #else /* not __STDC__ */ 348 #else /* not __STDC__ */
355 /* As in Harbison and Steele. */ 349 /* As in Harbison and Steele. */
356 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 350 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
357 #endif 351 #endif
358 352
359 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 353 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
360 use `alloca' instead of `malloc'. This is because using malloc in 354 use `alloca' instead of `malloc'. This is because using malloc in
361 re_search* or re_match* could cause memory leaks when C-g is used in 355 re_search* or re_match* could cause memory leaks when C-g is used in
366 not functions -- `alloca'-allocated space disappears at the end of the 360 not functions -- `alloca'-allocated space disappears at the end of the
367 function it is called in. */ 361 function it is called in. */
368 362
369 #ifdef REGEX_MALLOC 363 #ifdef REGEX_MALLOC
370 364
371 #define REGEX_ALLOCATE malloc 365 # define REGEX_ALLOCATE malloc
372 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 366 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
373 #define REGEX_FREE free 367 # define REGEX_FREE free
374 368
375 #else /* not REGEX_MALLOC */ 369 #else /* not REGEX_MALLOC */
376 370
377 /* Emacs already defines alloca, sometimes. */ 371 /* Emacs already defines alloca, sometimes. */
378 #ifndef alloca 372 # ifndef alloca
379 373
380 /* Make alloca work the best possible way. */ 374 /* Make alloca work the best possible way. */
381 #ifdef __GNUC__ 375 # ifdef __GNUC__
382 #define alloca __builtin_alloca 376 # define alloca __builtin_alloca
383 #else /* not __GNUC__ */ 377 # else /* not __GNUC__ */
384 #if HAVE_ALLOCA_H 378 # if HAVE_ALLOCA_H
385 #include <alloca.h> 379 # include <alloca.h>
386 #else /* not __GNUC__ or HAVE_ALLOCA_H */ 380 # endif /* HAVE_ALLOCA_H */
387 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */ 381 # endif /* not __GNUC__ */
388 #ifndef _AIX /* Already did AIX, up at the top. */ 382
389 char *alloca (); 383 # endif /* not alloca */
390 #endif /* not _AIX */ 384
391 #endif 385 # define REGEX_ALLOCATE alloca
392 #endif /* not HAVE_ALLOCA_H */
393 #endif /* not __GNUC__ */
394
395 #endif /* not alloca */
396
397 #define REGEX_ALLOCATE alloca
398 386
399 /* Assumes a `char *destination' variable. */ 387 /* Assumes a `char *destination' variable. */
400 #define REGEX_REALLOCATE(source, osize, nsize) \ 388 # define REGEX_REALLOCATE(source, osize, nsize) \
401 (destination = (char *) alloca (nsize), \ 389 (destination = (char *) alloca (nsize), \
402 bcopy (source, destination, osize), \ 390 bcopy (source, destination, osize), \
403 destination) 391 destination)
404 392
405 /* No need to do anything to free, after alloca. */ 393 /* No need to do anything to free, after alloca. */
406 #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 394 # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
407 395
408 #endif /* not REGEX_MALLOC */ 396 #endif /* not REGEX_MALLOC */
409 397
410 /* Define how to allocate the failure stack. */ 398 /* Define how to allocate the failure stack. */
411 399
412 #if defined (REL_ALLOC) && defined (REGEX_MALLOC) 400 #if defined REL_ALLOC && defined REGEX_MALLOC
413 401
414 #define REGEX_ALLOCATE_STACK(size) \ 402 # define REGEX_ALLOCATE_STACK(size) \
415 r_alloc (&failure_stack_ptr, (size)) 403 r_alloc (&failure_stack_ptr, (size))
416 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 404 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
417 r_re_alloc (&failure_stack_ptr, (nsize)) 405 r_re_alloc (&failure_stack_ptr, (nsize))
418 #define REGEX_FREE_STACK(ptr) \ 406 # define REGEX_FREE_STACK(ptr) \
419 r_alloc_free (&failure_stack_ptr) 407 r_alloc_free (&failure_stack_ptr)
420 408
421 #else /* not using relocating allocator */ 409 #else /* not using relocating allocator */
422 410
423 #ifdef REGEX_MALLOC 411 # ifdef REGEX_MALLOC
424 412
425 #define REGEX_ALLOCATE_STACK malloc 413 # define REGEX_ALLOCATE_STACK malloc
426 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 414 # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
427 #define REGEX_FREE_STACK free 415 # define REGEX_FREE_STACK free
428 416
429 #else /* not REGEX_MALLOC */ 417 # else /* not REGEX_MALLOC */
430 418
431 #define REGEX_ALLOCATE_STACK alloca 419 # define REGEX_ALLOCATE_STACK alloca
432 420
433 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 421 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
434 REGEX_REALLOCATE (source, osize, nsize) 422 REGEX_REALLOCATE (source, osize, nsize)
435 /* No need to explicitly free anything. */ 423 /* No need to explicitly free anything. */
436 #define REGEX_FREE_STACK(arg) ((void)0) 424 # define REGEX_FREE_STACK(arg) ((void)0)
437 425
438 #endif /* not REGEX_MALLOC */ 426 # endif /* not REGEX_MALLOC */
439 #endif /* not using relocating allocator */ 427 #endif /* not using relocating allocator */
440 428
441 429
442 /* True if `size1' is non-NULL and PTR is pointing anywhere inside 430 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
443 `string1' or just past its end. This works if PTR is NULL, which is 431 `string1' or just past its end. This works if PTR is NULL, which is
496 automatically not in the set. 484 automatically not in the set.
497 485
498 If the length byte has the 0x80 bit set, then that stuff 486 If the length byte has the 0x80 bit set, then that stuff
499 is followed by a range table: 487 is followed by a range table:
500 2 bytes of flags for character sets (low 8 bits, high 8 bits) 488 2 bytes of flags for character sets (low 8 bits, high 8 bits)
501 See RANGE_TABLE_WORK_BITS below. 489 See RANGE_TABLE_WORK_BITS below.
502 2 bytes, the number of pairs that follow 490 2 bytes, the number of pairs that follow
503 pairs, each 2 multibyte characters, 491 pairs, each 2 multibyte characters,
504 each multibyte character represented as 3 bytes. */ 492 each multibyte character represented as 3 bytes. */
505 charset, 493 charset,
506 494
507 /* Same parameters as charset, but match any character that is 495 /* Same parameters as charset, but match any character that is
508 not one of those specified. */ 496 not one of those specified. */
509 charset_not, 497 charset_not,
557 a different kind of loop (the kind that shows up with non-greedy 545 a different kind of loop (the kind that shows up with non-greedy
558 operators). This operation has to be immediately preceded 546 operators). This operation has to be immediately preceded
559 by a `no_op'. */ 547 by a `no_op'. */
560 on_failure_jump_nastyloop, 548 on_failure_jump_nastyloop,
561 549
562 /* A smart `on_failure_jump' used for greedy * and + operators. 550 /* A smart `on_failure_jump' used for greedy * and + operators.
563 It analyses the loop before which it is put and if the 551 It analyses the loop before which it is put and if the
564 loop does not require backtracking, it changes itself to 552 loop does not require backtracking, it changes itself to
565 `on_failure_keep_string_jump' and short-circuits the loop, 553 `on_failure_keep_string_jump' and short-circuits the loop,
566 else it just defaults to changing itself into `on_failure_jump'. 554 else it just defaults to changing itself into `on_failure_jump'.
567 It assumes that it is pointing to just past a `jump'. */ 555 It assumes that it is pointing to just past a `jump'. */
650 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 638 int temp = SIGN_EXTEND_CHAR (*(source + 1));
651 *dest = *source & 0377; 639 *dest = *source & 0377;
652 *dest += temp << 8; 640 *dest += temp << 8;
653 } 641 }
654 642
655 #ifndef EXTRACT_MACROS /* To debug the macros. */ 643 # ifndef EXTRACT_MACROS /* To debug the macros. */
656 #undef EXTRACT_NUMBER 644 # undef EXTRACT_NUMBER
657 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 645 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
658 #endif /* not EXTRACT_MACROS */ 646 # endif /* not EXTRACT_MACROS */
659 647
660 #endif /* DEBUG */ 648 #endif /* DEBUG */
661 649
662 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 650 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
663 SOURCE must be an lvalue. */ 651 SOURCE must be an lvalue. */
676 { 664 {
677 extract_number (destination, *source); 665 extract_number (destination, *source);
678 *source += 2; 666 *source += 2;
679 } 667 }
680 668
681 #ifndef EXTRACT_MACROS 669 # ifndef EXTRACT_MACROS
682 #undef EXTRACT_NUMBER_AND_INCR 670 # undef EXTRACT_NUMBER_AND_INCR
683 #define EXTRACT_NUMBER_AND_INCR(dest, src) \ 671 # define EXTRACT_NUMBER_AND_INCR(dest, src) \
684 extract_number_and_incr (&dest, &src) 672 extract_number_and_incr (&dest, &src)
685 #endif /* not EXTRACT_MACROS */ 673 # endif /* not EXTRACT_MACROS */
686 674
687 #endif /* DEBUG */ 675 #endif /* DEBUG */
688 676
689 /* Store a multibyte character in three contiguous bytes starting 677 /* Store a multibyte character in three contiguous bytes starting
690 DESTINATION, and increment DESTINATION to the byte after where the 678 DESTINATION, and increment DESTINATION to the byte after where the
786 the other test files, you can run the already-written tests. */ 774 the other test files, you can run the already-written tests. */
787 775
788 #ifdef DEBUG 776 #ifdef DEBUG
789 777
790 /* We use standard I/O for debugging. */ 778 /* We use standard I/O for debugging. */
791 #include <stdio.h> 779 # include <stdio.h>
792 780
793 /* It is useful to test things that ``must'' be true when debugging. */ 781 /* It is useful to test things that ``must'' be true when debugging. */
794 #include <assert.h> 782 # include <assert.h>
795 783
796 static int debug = -100000; 784 static int debug = -100000;
797 785
798 #define DEBUG_STATEMENT(e) e 786 # define DEBUG_STATEMENT(e) e
799 #define DEBUG_PRINT1(x) if (debug > 0) printf (x) 787 # define DEBUG_PRINT1(x) if (debug > 0) printf (x)
800 #define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2) 788 # define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
801 #define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3) 789 # define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
802 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4) 790 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
803 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 791 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
804 if (debug > 0) print_partial_compiled_pattern (s, e) 792 if (debug > 0) print_partial_compiled_pattern (s, e)
805 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 793 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
806 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2) 794 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
807 795
808 796
809 /* Print the fastmap in human-readable form. */ 797 /* Print the fastmap in human-readable form. */
810 798
1034 printf ("/notsyntaxspec"); 1022 printf ("/notsyntaxspec");
1035 mcnt = *p++; 1023 mcnt = *p++;
1036 printf ("/%d", mcnt); 1024 printf ("/%d", mcnt);
1037 break; 1025 break;
1038 1026
1039 #ifdef emacs 1027 # ifdef emacs
1040 case before_dot: 1028 case before_dot:
1041 printf ("/before_dot"); 1029 printf ("/before_dot");
1042 break; 1030 break;
1043 1031
1044 case at_dot: 1032 case at_dot:
1058 case notcategoryspec: 1046 case notcategoryspec:
1059 printf ("/notcategoryspec"); 1047 printf ("/notcategoryspec");
1060 mcnt = *p++; 1048 mcnt = *p++;
1061 printf ("/%d", mcnt); 1049 printf ("/%d", mcnt);
1062 break; 1050 break;
1063 #endif /* emacs */ 1051 # endif /* emacs */
1064 1052
1065 case begbuf: 1053 case begbuf:
1066 printf ("/begbuf"); 1054 printf ("/begbuf");
1067 break; 1055 break;
1068 1056
1136 } 1124 }
1137 } 1125 }
1138 1126
1139 #else /* not DEBUG */ 1127 #else /* not DEBUG */
1140 1128
1141 #undef assert 1129 # undef assert
1142 #define assert(e) 1130 # define assert(e)
1143 1131
1144 #define DEBUG_STATEMENT(e) 1132 # define DEBUG_STATEMENT(e)
1145 #define DEBUG_PRINT1(x) 1133 # define DEBUG_PRINT1(x)
1146 #define DEBUG_PRINT2(x1, x2) 1134 # define DEBUG_PRINT2(x1, x2)
1147 #define DEBUG_PRINT3(x1, x2, x3) 1135 # define DEBUG_PRINT3(x1, x2, x3)
1148 #define DEBUG_PRINT4(x1, x2, x3, x4) 1136 # define DEBUG_PRINT4(x1, x2, x3, x4)
1149 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 1137 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1150 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 1138 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1151 1139
1152 #endif /* not DEBUG */ 1140 #endif /* not DEBUG */
1153 1141
1154 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 1142 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1155 also be assigned to arbitrarily: each pattern buffer stores its own 1143 also be assigned to arbitrarily: each pattern buffer stores its own
1225 #define MATCH_MAY_ALLOCATE 1213 #define MATCH_MAY_ALLOCATE
1226 1214
1227 /* When using GNU C, we are not REALLY using the C alloca, no matter 1215 /* When using GNU C, we are not REALLY using the C alloca, no matter
1228 what config.h may say. So don't take precautions for it. */ 1216 what config.h may say. So don't take precautions for it. */
1229 #ifdef __GNUC__ 1217 #ifdef __GNUC__
1230 #undef C_ALLOCA 1218 # undef C_ALLOCA
1231 #endif 1219 #endif
1232 1220
1233 /* The match routines may not allocate if (1) they would do it with malloc 1221 /* The match routines may not allocate if (1) they would do it with malloc
1234 and (2) it's not safe for them to use malloc. 1222 and (2) it's not safe for them to use malloc.
1235 Note that if REL_ALLOC is defined, matching would not use malloc for the 1223 Note that if REL_ALLOC is defined, matching would not use malloc for the
1236 failure stack, but we would still use it for the register vectors; 1224 failure stack, but we would still use it for the register vectors;
1237 so REL_ALLOC should not affect this. */ 1225 so REL_ALLOC should not affect this. */
1238 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) 1226 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1239 #undef MATCH_MAY_ALLOCATE 1227 # undef MATCH_MAY_ALLOCATE
1240 #endif 1228 #endif
1241 1229
1242 1230
1243 /* Failure stack declarations and macros; both re_compile_fastmap and 1231 /* Failure stack declarations and macros; both re_compile_fastmap and
1244 re_match_2 use a failure stack. These have to be macros because of 1232 re_match_2 use a failure stack. These have to be macros because of
1247 1235
1248 /* Approximate number of failure points for which to initially allocate space 1236 /* Approximate number of failure points for which to initially allocate space
1249 when matching. If this number is exceeded, we allocate more 1237 when matching. If this number is exceeded, we allocate more
1250 space, so it is not a hard limit. */ 1238 space, so it is not a hard limit. */
1251 #ifndef INIT_FAILURE_ALLOC 1239 #ifndef INIT_FAILURE_ALLOC
1252 #define INIT_FAILURE_ALLOC 20 1240 # define INIT_FAILURE_ALLOC 20
1253 #endif 1241 #endif
1254 1242
1255 /* Roughly the maximum number of failure points on the stack. Would be 1243 /* Roughly the maximum number of failure points on the stack. Would be
1256 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. 1244 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1257 This is a variable only so users of regex can assign to it; we never 1245 This is a variable only so users of regex can assign to it; we never
1258 change it ourselves. */ 1246 change it ourselves. */
1259 #if defined (MATCH_MAY_ALLOCATE) 1247 #if defined MATCH_MAY_ALLOCATE
1260 /* Note that 4400 is enough to cause a crash on Alpha OSF/1, 1248 /* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1261 whose default stack limit is 2mb. In order for a larger 1249 whose default stack limit is 2mb. In order for a larger
1262 value to work reliably, you have to try to make it accord 1250 value to work reliably, you have to try to make it accord
1263 with the process stack limit. */ 1251 with the process stack limit. */
1264 int re_max_failures = 40000; 1252 int re_max_failures = 40000;
1289 1277
1290 /* Define macros to initialize and free the failure stack. 1278 /* Define macros to initialize and free the failure stack.
1291 Do `return -2' if the alloc fails. */ 1279 Do `return -2' if the alloc fails. */
1292 1280
1293 #ifdef MATCH_MAY_ALLOCATE 1281 #ifdef MATCH_MAY_ALLOCATE
1294 #define INIT_FAIL_STACK() \ 1282 # define INIT_FAIL_STACK() \
1295 do { \ 1283 do { \
1296 fail_stack.stack = (fail_stack_elt_t *) \ 1284 fail_stack.stack = (fail_stack_elt_t *) \
1297 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ 1285 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1298 * sizeof (fail_stack_elt_t)); \ 1286 * sizeof (fail_stack_elt_t)); \
1299 \ 1287 \
1303 fail_stack.size = INIT_FAILURE_ALLOC; \ 1291 fail_stack.size = INIT_FAILURE_ALLOC; \
1304 fail_stack.avail = 0; \ 1292 fail_stack.avail = 0; \
1305 fail_stack.frame = 0; \ 1293 fail_stack.frame = 0; \
1306 } while (0) 1294 } while (0)
1307 1295
1308 #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 1296 # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1309 #else 1297 #else
1310 #define INIT_FAIL_STACK() \ 1298 # define INIT_FAIL_STACK() \
1311 do { \ 1299 do { \
1312 fail_stack.avail = 0; \ 1300 fail_stack.avail = 0; \
1313 fail_stack.frame = 0; \ 1301 fail_stack.frame = 0; \
1314 } while (0) 1302 } while (0)
1315 1303
1316 #define RESET_FAIL_STACK() ((void)0) 1304 # define RESET_FAIL_STACK() ((void)0)
1317 #endif 1305 #endif
1318 1306
1319 1307
1320 /* Double the size of FAIL_STACK, up to a limit 1308 /* Double the size of FAIL_STACK, up to a limit
1321 which allows approximately `re_max_failures' items. 1309 which allows approximately `re_max_failures' items.
1615 /* If `translate' is non-null, return translate[D], else just D. We 1603 /* If `translate' is non-null, return translate[D], else just D. We
1616 cast the subscript to translate because some data is declared as 1604 cast the subscript to translate because some data is declared as
1617 `char *', to avoid warnings when a string constant is passed. But 1605 `char *', to avoid warnings when a string constant is passed. But
1618 when we use a character as a subscript we must make it unsigned. */ 1606 when we use a character as a subscript we must make it unsigned. */
1619 #ifndef TRANSLATE 1607 #ifndef TRANSLATE
1620 #define TRANSLATE(d) \ 1608 # define TRANSLATE(d) \
1621 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) 1609 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1622 #endif 1610 #endif
1623 1611
1624 1612
1625 /* Macros for outputting the compiled pattern into `buffer'. */ 1613 /* Macros for outputting the compiled pattern into `buffer'. */
1856 || STREQ (string, "word") \ 1844 || STREQ (string, "word") \
1857 || STREQ (string, "ascii") || STREQ (string, "nonascii") \ 1845 || STREQ (string, "ascii") || STREQ (string, "nonascii") \
1858 || STREQ (string, "unibyte") || STREQ (string, "multibyte")) 1846 || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
1859 1847
1860 /* QUIT is only used on NTemacs. */ 1848 /* QUIT is only used on NTemacs. */
1861 #if !defined (WINDOWSNT) || !defined (emacs) 1849 #if !defined WINDOWSNT || !defined emacs
1862 #undef QUIT 1850 # undef QUIT
1863 #define QUIT 1851 # define QUIT
1864 #endif 1852 #endif
1865 1853
1866 #ifndef MATCH_MAY_ALLOCATE 1854 #ifndef MATCH_MAY_ALLOCATE
1867 1855
1868 /* If we cannot allocate large objects within re_match_2_internal, 1856 /* If we cannot allocate large objects within re_match_2_internal,
2043 bufp->used = 0; 2031 bufp->used = 0;
2044 2032
2045 /* Always count groups, whether or not bufp->no_sub is set. */ 2033 /* Always count groups, whether or not bufp->no_sub is set. */
2046 bufp->re_nsub = 0; 2034 bufp->re_nsub = 0;
2047 2035
2048 #if !defined (emacs) && !defined (SYNTAX_TABLE) 2036 #if !defined emacs && !defined SYNTAX_TABLE
2049 /* Initialize the syntax table. */ 2037 /* Initialize the syntax table. */
2050 init_syntax_once (); 2038 init_syntax_once ();
2051 #endif 2039 #endif
2052 2040
2053 if (bufp->allocated == 0) 2041 if (bufp->allocated == 0)
2132 interval operators with these because of, e.g., `a{2}*', 2120 interval operators with these because of, e.g., `a{2}*',
2133 which should only match an even number of `a's. */ 2121 which should only match an even number of `a's. */
2134 2122
2135 for (;;) 2123 for (;;)
2136 { 2124 {
2137 if (!(syntax & RE_ALL_GREEDY) 2125 if ((syntax & RE_FRUGAL)
2138 && c == '?' && (zero_times_ok || many_times_ok)) 2126 && c == '?' && (zero_times_ok || many_times_ok))
2139 greedy = 0; 2127 greedy = 0;
2140 else 2128 else
2141 { 2129 {
2142 zero_times_ok |= c != '+'; 2130 zero_times_ok |= c != '+';
3320 #endif 3308 #endif
3321 #ifndef REGEX_MALLOC 3309 #ifndef REGEX_MALLOC
3322 char *destination; 3310 char *destination;
3323 #endif 3311 #endif
3324 3312
3325 #if defined (REL_ALLOC) && defined (REGEX_MALLOC) 3313 #if defined REL_ALLOC && defined REGEX_MALLOC
3326 /* This holds the pointer to the failure stack, when 3314 /* This holds the pointer to the failure stack, when
3327 it is allocated relocatably. */ 3315 it is allocated relocatably. */
3328 fail_stack_elt_t *failure_stack_ptr; 3316 fail_stack_elt_t *failure_stack_ptr;
3329 #endif 3317 #endif
3330 3318
3467 /* Set fastmap[I] 1 where I is a base leading code of each 3455 /* Set fastmap[I] 1 where I is a base leading code of each
3468 multibyte character in the range table. */ 3456 multibyte character in the range table. */
3469 int c, count; 3457 int c, count;
3470 3458
3471 /* Make P points the range table. `+ 2' is to skip flag 3459 /* Make P points the range table. `+ 2' is to skip flag
3472 bits for a character class. */ 3460 bits for a character class. */
3473 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2; 3461 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3474 3462
3475 /* Extract the number of ranges in range table into COUNT. */ 3463 /* Extract the number of ranges in range table into COUNT. */
3476 EXTRACT_NUMBER_AND_INCR (count, p); 3464 EXTRACT_NUMBER_AND_INCR (count, p);
3477 for (; count > 0; count--, p += 2 * 3) /* XXX */ 3465 for (; count > 0; count--, p += 2 * 3) /* XXX */
3898 return -1; 3886 return -1;
3899 3887
3900 val = re_match_2_internal (bufp, string1, size1, string2, size2, 3888 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3901 startpos, regs, stop); 3889 startpos, regs, stop);
3902 #ifndef REGEX_MALLOC 3890 #ifndef REGEX_MALLOC
3903 #ifdef C_ALLOCA 3891 # ifdef C_ALLOCA
3904 alloca (0); 3892 alloca (0);
3905 #endif 3893 # endif
3906 #endif 3894 #endif
3907 3895
3908 if (val >= 0) 3896 if (val >= 0)
3909 return startpos; 3897 return startpos;
3910 3898
4041 || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 4029 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4042 #endif 4030 #endif
4043 4031
4044 /* Free everything we malloc. */ 4032 /* Free everything we malloc. */
4045 #ifdef MATCH_MAY_ALLOCATE 4033 #ifdef MATCH_MAY_ALLOCATE
4046 #define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else 4034 # define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4047 #define FREE_VARIABLES() \ 4035 # define FREE_VARIABLES() \
4048 do { \ 4036 do { \
4049 REGEX_FREE_STACK (fail_stack.stack); \ 4037 REGEX_FREE_STACK (fail_stack.stack); \
4050 FREE_VAR (regstart); \ 4038 FREE_VAR (regstart); \
4051 FREE_VAR (regend); \ 4039 FREE_VAR (regend); \
4052 FREE_VAR (best_regstart); \ 4040 FREE_VAR (best_regstart); \
4053 FREE_VAR (best_regend); \ 4041 FREE_VAR (best_regend); \
4054 } while (0) 4042 } while (0)
4055 #else 4043 #else
4056 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 4044 # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4057 #endif /* not MATCH_MAY_ALLOCATE */ 4045 #endif /* not MATCH_MAY_ALLOCATE */
4058 4046
4059 4047
4060 /* Optimization routines. */ 4048 /* Optimization routines. */
4061 4049
4328 int size, pos; 4316 int size, pos;
4329 struct re_registers *regs; 4317 struct re_registers *regs;
4330 { 4318 {
4331 int result = re_match_2_internal (bufp, NULL, 0, string, size, 4319 int result = re_match_2_internal (bufp, NULL, 0, string, size,
4332 pos, regs, size); 4320 pos, regs, size);
4333 #if defined (C_ALLOCA) && !defined (REGEX_MALLOC) 4321 # if defined C_ALLOCA && !defined REGEX_MALLOC
4334 alloca (0); 4322 alloca (0);
4335 #endif 4323 # endif
4336 return result; 4324 return result;
4337 } 4325 }
4338 #endif /* not emacs */ 4326 #endif /* not emacs */
4339 4327
4340 #ifdef emacs 4328 #ifdef emacs
4374 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 4362 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4375 #endif 4363 #endif
4376 4364
4377 result = re_match_2_internal (bufp, string1, size1, string2, size2, 4365 result = re_match_2_internal (bufp, string1, size1, string2, size2,
4378 pos, regs, stop); 4366 pos, regs, stop);
4379 #if defined (C_ALLOCA) && !defined (REGEX_MALLOC) 4367 #if defined C_ALLOCA && !defined REGEX_MALLOC
4380 alloca (0); 4368 alloca (0);
4381 #endif 4369 #endif
4382 return result; 4370 return result;
4383 } 4371 }
4384 4372
4437 #ifdef DEBUG 4425 #ifdef DEBUG
4438 static unsigned failure_id = 0; 4426 static unsigned failure_id = 0;
4439 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4427 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4440 #endif 4428 #endif
4441 4429
4442 #if defined (REL_ALLOC) && defined (REGEX_MALLOC) 4430 #if defined REL_ALLOC && defined REGEX_MALLOC
4443 /* This holds the pointer to the failure stack, when 4431 /* This holds the pointer to the failure stack, when
4444 it is allocated relocatably. */ 4432 it is allocated relocatably. */
4445 fail_stack_elt_t *failure_stack_ptr; 4433 fail_stack_elt_t *failure_stack_ptr;
4446 #endif 4434 #endif
4447 4435
4956 DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p); 4944 DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
4957 4945
4958 assert (!REG_UNSET (regstart[*p])); 4946 assert (!REG_UNSET (regstart[*p]));
4959 /* Strictly speaking, there should be code such as: 4947 /* Strictly speaking, there should be code such as:
4960 4948
4961 assert (REG_UNSET (regend[*p])); 4949 assert (REG_UNSET (regend[*p]));
4962 PUSH_FAILURE_REGSTOP ((unsigned int)*p); 4950 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
4963 4951
4964 But the only info to be pushed is regend[*p] and it is known to 4952 But the only info to be pushed is regend[*p] and it is known to
4965 be UNSET, so there really isn't anything to push. 4953 be UNSET, so there really isn't anything to push.
4966 Not pushing anything, on the other hand deprives us from the 4954 Not pushing anything, on the other hand deprives us from the
5473 } 5461 }
5474 break; 5462 break;
5475 5463
5476 #endif /* emacs */ 5464 #endif /* emacs */
5477 5465
5478 default: 5466 default:
5479 abort (); 5467 abort ();
5480 } 5468 }
5481 continue; /* Successfully executed one pattern command; keep going. */ 5469 continue; /* Successfully executed one pattern command; keep going. */
5482 5470
5483 5471
5484 /* We goto here if a matching operation fails. */ 5472 /* We goto here if a matching operation fails. */
5487 if (!FAIL_STACK_EMPTY ()) 5475 if (!FAIL_STACK_EMPTY ())
5488 { 5476 {
5489 re_char *str; 5477 re_char *str;
5490 unsigned char *pat; 5478 unsigned char *pat;
5491 /* A restart point is known. Restore to that state. */ 5479 /* A restart point is known. Restore to that state. */
5492 DEBUG_PRINT1 ("\nFAIL:\n"); 5480 DEBUG_PRINT1 ("\nFAIL:\n");
5493 POP_FAILURE_POINT (str, pat); 5481 POP_FAILURE_POINT (str, pat);
5494 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++)) 5482 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
5495 { 5483 {
5496 case on_failure_keep_string_jump: 5484 case on_failure_keep_string_jump:
5497 assert (str == NULL); 5485 assert (str == NULL);
5498 goto continue_failure_jump; 5486 goto continue_failure_jump;
5519 abort(); 5507 abort();
5520 } 5508 }
5521 5509
5522 assert (p >= bufp->buffer && p <= pend); 5510 assert (p >= bufp->buffer && p <= pend);
5523 5511
5524 if (d >= string1 && d <= end1) 5512 if (d >= string1 && d <= end1)
5525 dend = end_match_1; 5513 dend = end_match_1;
5526 } 5514 }
5527 else 5515 else
5528 break; /* Matching at this starting point really fails. */ 5516 break; /* Matching at this starting point really fails. */
5529 } /* for (;;) */ 5517 } /* for (;;) */
5530 5518
5531 if (best_regs_set) 5519 if (best_regs_set)
5532 goto restore_best_regs; 5520 goto restore_best_regs;
5533 5521
5585 We call regex_compile to do the actual compilation. */ 5573 We call regex_compile to do the actual compilation. */
5586 5574
5587 const char * 5575 const char *
5588 re_compile_pattern (pattern, length, bufp) 5576 re_compile_pattern (pattern, length, bufp)
5589 const char *pattern; 5577 const char *pattern;
5590 int length; 5578 size_t length;
5591 struct re_pattern_buffer *bufp; 5579 struct re_pattern_buffer *bufp;
5592 { 5580 {
5593 reg_errcode_t ret; 5581 reg_errcode_t ret;
5594 5582
5595 /* GNU code is written to assume at least RE_NREGS registers will be set 5583 /* GNU code is written to assume at least RE_NREGS registers will be set
5612 } 5600 }
5613 5601
5614 /* Entry points compatible with 4.2 BSD regex library. We don't define 5602 /* Entry points compatible with 4.2 BSD regex library. We don't define
5615 them unless specifically requested. */ 5603 them unless specifically requested. */
5616 5604
5617 #if defined (_REGEX_RE_COMP) || defined (_LIBC) 5605 #if defined _REGEX_RE_COMP || defined _LIBC
5618 5606
5619 /* BSD has one and only one pattern buffer. */ 5607 /* BSD has one and only one pattern buffer. */
5620 static struct re_pattern_buffer re_comp_buf; 5608 static struct re_pattern_buffer re_comp_buf;
5621 5609
5622 char * 5610 char *
5623 #ifdef _LIBC 5611 # ifdef _LIBC
5624 /* Make these definitions weak in libc, so POSIX programs can redefine 5612 /* Make these definitions weak in libc, so POSIX programs can redefine
5625 these names if they don't use our functions, and still use 5613 these names if they don't use our functions, and still use
5626 regcomp/regexec below without link errors. */ 5614 regcomp/regexec below without link errors. */
5627 weak_function 5615 weak_function
5628 #endif 5616 # endif
5629 re_comp (s) 5617 re_comp (s)
5630 const char *s; 5618 const char *s;
5631 { 5619 {
5632 reg_errcode_t ret; 5620 reg_errcode_t ret;
5633 5621
5634 if (!s) 5622 if (!s)
5635 { 5623 {
5636 if (!re_comp_buf.buffer) 5624 if (!re_comp_buf.buffer)
5637 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 5625 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5638 return (char *) gettext ("No previous regular expression"); 5626 return (char *) gettext ("No previous regular expression");
5639 return 0; 5627 return 0;
5640 } 5628 }
5641 5629
5642 if (!re_comp_buf.buffer) 5630 if (!re_comp_buf.buffer)
5643 { 5631 {
5644 re_comp_buf.buffer = (unsigned char *) malloc (200); 5632 re_comp_buf.buffer = (unsigned char *) malloc (200);
5645 if (re_comp_buf.buffer == NULL) 5633 if (re_comp_buf.buffer == NULL)
5646 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 5634 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5647 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 5635 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5648 re_comp_buf.allocated = 200; 5636 re_comp_buf.allocated = 200;
5649 5637
5650 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 5638 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5651 if (re_comp_buf.fastmap == NULL) 5639 if (re_comp_buf.fastmap == NULL)
5652 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 5640 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5668 return (char *) gettext (re_error_msgid[(int) ret]); 5656 return (char *) gettext (re_error_msgid[(int) ret]);
5669 } 5657 }
5670 5658
5671 5659
5672 int 5660 int
5673 #ifdef _LIBC 5661 # ifdef _LIBC
5674 weak_function 5662 weak_function
5675 #endif 5663 # endif
5676 re_exec (s) 5664 re_exec (s)
5677 const char *s; 5665 const char *s;
5678 { 5666 {
5679 const int len = strlen (s); 5667 const int len = strlen (s);
5680 return 5668 return
5748 5736
5749 preg->translate 5737 preg->translate
5750 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 5738 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5751 * sizeof (*(RE_TRANSLATE_TYPE)0)); 5739 * sizeof (*(RE_TRANSLATE_TYPE)0));
5752 if (preg->translate == NULL) 5740 if (preg->translate == NULL)
5753 return (int) REG_ESPACE; 5741 return (int) REG_ESPACE;
5754 5742
5755 /* Map uppercase characters to corresponding lowercase ones. */ 5743 /* Map uppercase characters to corresponding lowercase ones. */
5756 for (i = 0; i < CHAR_SET_SIZE; i++) 5744 for (i = 0; i < CHAR_SET_SIZE; i++)
5757 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 5745 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5758 } 5746 }
5759 else 5747 else
5760 preg->translate = NULL; 5748 preg->translate = NULL;
5761 5749
5762 /* If REG_NEWLINE is set, newlines are treated differently. */ 5750 /* If REG_NEWLINE is set, newlines are treated differently. */
5826 { 5814 {
5827 regs.num_regs = nmatch; 5815 regs.num_regs = nmatch;
5828 regs.start = TALLOC (nmatch, regoff_t); 5816 regs.start = TALLOC (nmatch, regoff_t);
5829 regs.end = TALLOC (nmatch, regoff_t); 5817 regs.end = TALLOC (nmatch, regoff_t);
5830 if (regs.start == NULL || regs.end == NULL) 5818 if (regs.start == NULL || regs.end == NULL)
5831 return (int) REG_NOMATCH; 5819 return (int) REG_NOMATCH;
5832 } 5820 }
5833 5821
5834 /* Perform the searching operation. */ 5822 /* Perform the searching operation. */
5835 ret = re_search (&private_preg, string, len, 5823 ret = re_search (&private_preg, string, len,
5836 /* start: */ 0, /* range: */ len, 5824 /* start: */ 0, /* range: */ len,
5837 want_reg_info ? &regs : (struct re_registers *) 0); 5825 want_reg_info ? &regs : (struct re_registers *) 0);
5838 5826
5839 /* Copy the register information to the POSIX structure. */ 5827 /* Copy the register information to the POSIX structure. */
5840 if (want_reg_info) 5828 if (want_reg_info)
5841 { 5829 {
5842 if (ret >= 0) 5830 if (ret >= 0)
5843 { 5831 {
5844 unsigned r; 5832 unsigned r;
5845 5833
5846 for (r = 0; r < nmatch; r++) 5834 for (r = 0; r < nmatch; r++)
5847 { 5835 {
5848 pmatch[r].rm_so = regs.start[r]; 5836 pmatch[r].rm_so = regs.start[r];
5849 pmatch[r].rm_eo = regs.end[r]; 5837 pmatch[r].rm_eo = regs.end[r];
5850 } 5838 }
5851 } 5839 }
5852 5840
5853 /* If we needed the temporary register info, free the space now. */ 5841 /* If we needed the temporary register info, free the space now. */
5854 free (regs.start); 5842 free (regs.start);
5855 free (regs.end); 5843 free (regs.end);
5856 } 5844 }
5886 msg_size = strlen (msg) + 1; /* Includes the null. */ 5874 msg_size = strlen (msg) + 1; /* Includes the null. */
5887 5875
5888 if (errbuf_size != 0) 5876 if (errbuf_size != 0)
5889 { 5877 {
5890 if (msg_size > errbuf_size) 5878 if (msg_size > errbuf_size)
5891 { 5879 {
5892 strncpy (errbuf, msg, errbuf_size - 1); 5880 strncpy (errbuf, msg, errbuf_size - 1);
5893 errbuf[errbuf_size - 1] = 0; 5881 errbuf[errbuf_size - 1] = 0;
5894 } 5882 }
5895 else 5883 else
5896 strcpy (errbuf, msg); 5884 strcpy (errbuf, msg);
5897 } 5885 }
5898 5886
5899 return msg_size; 5887 return msg_size;
5900 } 5888 }
5901 5889