Mercurial > hg > octave-kai > gnulib-hg
annotate lib/fnmatch_loop.c @ 4739:04758f7475fd
Merge changes from glibc.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Fri, 26 Sep 2003 07:35:01 +0000 |
parents | 6ad23581140d |
children | c47569f6dfaa |
rev | line source |
---|---|
3914 | 1 /* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001, |
4502
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
2 2002, 2003 Free Software Foundation, Inc. |
3914 | 3 |
4 This program is free software; you can redistribute it and/or modify | |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2, or (at your option) | |
7 any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software Foundation, | |
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
17 | |
18 /* Match STRING against the filename pattern PATTERN, returning zero if | |
19 it matches, nonzero if not. */ | |
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string, | |
21 const CHAR *string_end, int no_leading_period, int flags) | |
22 internal_function; | |
23 static const CHAR *END (const CHAR *patternp) internal_function; | |
24 | |
25 static int | |
26 internal_function | |
4643 | 27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
28 int no_leading_period, int flags) | |
3914 | 29 { |
30 register const CHAR *p = pattern, *n = string; | |
31 register UCHAR c; | |
32 #ifdef _LIBC | |
33 # if WIDE_CHAR_VERSION | |
34 const char *collseq = (const char *) | |
35 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); | |
36 # else | |
37 const UCHAR *collseq = (const UCHAR *) | |
38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); | |
39 # endif | |
40 #endif | |
41 | |
42 while ((c = *p++) != L('\0')) | |
43 { | |
44 int new_no_leading_period = 0; | |
45 c = FOLD (c); | |
46 | |
47 switch (c) | |
48 { | |
49 case L('?'): | |
50 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
51 { | |
52 int res; | |
53 | |
54 res = EXT (c, p, n, string_end, no_leading_period, | |
55 flags); | |
56 if (res != -1) | |
57 return res; | |
58 } | |
59 | |
60 if (n == string_end) | |
61 return FNM_NOMATCH; | |
62 else if (*n == L('/') && (flags & FNM_FILE_NAME)) | |
63 return FNM_NOMATCH; | |
64 else if (*n == L('.') && no_leading_period) | |
65 return FNM_NOMATCH; | |
66 break; | |
67 | |
68 case L('\\'): | |
69 if (!(flags & FNM_NOESCAPE)) | |
70 { | |
71 c = *p++; | |
72 if (c == L('\0')) | |
73 /* Trailing \ loses. */ | |
74 return FNM_NOMATCH; | |
75 c = FOLD (c); | |
76 } | |
77 if (n == string_end || FOLD ((UCHAR) *n) != c) | |
78 return FNM_NOMATCH; | |
79 break; | |
80 | |
81 case L('*'): | |
82 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
83 { | |
84 int res; | |
85 | |
86 res = EXT (c, p, n, string_end, no_leading_period, | |
87 flags); | |
88 if (res != -1) | |
89 return res; | |
90 } | |
91 | |
92 if (n != string_end && *n == L('.') && no_leading_period) | |
93 return FNM_NOMATCH; | |
94 | |
95 for (c = *p++; c == L('?') || c == L('*'); c = *p++) | |
96 { | |
97 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0) | |
98 { | |
99 const CHAR *endp = END (p); | |
100 if (endp != p) | |
101 { | |
102 /* This is a pattern. Skip over it. */ | |
103 p = endp; | |
104 continue; | |
105 } | |
106 } | |
107 | |
108 if (c == L('?')) | |
109 { | |
110 /* A ? needs to match one character. */ | |
111 if (n == string_end) | |
112 /* There isn't another character; no match. */ | |
113 return FNM_NOMATCH; | |
114 else if (*n == L('/') | |
115 && __builtin_expect (flags & FNM_FILE_NAME, 0)) | |
116 /* A slash does not match a wildcard under | |
117 FNM_FILE_NAME. */ | |
118 return FNM_NOMATCH; | |
119 else | |
120 /* One character of the string is consumed in matching | |
121 this ? wildcard, so *??? won't match if there are | |
122 less than three characters. */ | |
123 ++n; | |
124 } | |
125 } | |
126 | |
127 if (c == L('\0')) | |
128 /* The wildcard(s) is/are the last element of the pattern. | |
129 If the name is a file name and contains another slash | |
130 this means it cannot match, unless the FNM_LEADING_DIR | |
131 flag is set. */ | |
132 { | |
133 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; | |
134 | |
135 if (flags & FNM_FILE_NAME) | |
136 { | |
137 if (flags & FNM_LEADING_DIR) | |
138 result = 0; | |
139 else | |
140 { | |
141 if (MEMCHR (n, L('/'), string_end - n) == NULL) | |
142 result = 0; | |
143 } | |
144 } | |
145 | |
146 return result; | |
147 } | |
148 else | |
149 { | |
150 const CHAR *endp; | |
151 | |
152 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), | |
153 string_end - n); | |
154 if (endp == NULL) | |
155 endp = string_end; | |
156 | |
157 if (c == L('[') | |
158 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 | |
159 && (c == L('@') || c == L('+') || c == L('!')) | |
160 && *p == L('('))) | |
161 { | |
162 int flags2 = ((flags & FNM_FILE_NAME) | |
163 ? flags : (flags & ~FNM_PERIOD)); | |
164 int no_leading_period2 = no_leading_period; | |
165 | |
166 for (--p; n < endp; ++n, no_leading_period2 = 0) | |
167 if (FCT (p, n, string_end, no_leading_period2, flags2) | |
168 == 0) | |
169 return 0; | |
170 } | |
171 else if (c == L('/') && (flags & FNM_FILE_NAME)) | |
172 { | |
173 while (n < string_end && *n != L('/')) | |
174 ++n; | |
175 if (n < string_end && *n == L('/') | |
176 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) | |
177 == 0)) | |
178 return 0; | |
179 } | |
180 else | |
181 { | |
182 int flags2 = ((flags & FNM_FILE_NAME) | |
183 ? flags : (flags & ~FNM_PERIOD)); | |
184 int no_leading_period2 = no_leading_period; | |
185 | |
186 if (c == L('\\') && !(flags & FNM_NOESCAPE)) | |
187 c = *p; | |
188 c = FOLD (c); | |
189 for (--p; n < endp; ++n, no_leading_period2 = 0) | |
190 if (FOLD ((UCHAR) *n) == c | |
191 && (FCT (p, n, string_end, no_leading_period2, flags2) | |
192 == 0)) | |
193 return 0; | |
194 } | |
195 } | |
196 | |
197 /* If we come here no match is possible with the wildcard. */ | |
198 return FNM_NOMATCH; | |
199 | |
200 case L('['): | |
201 { | |
202 /* Nonzero if the sense of the character class is inverted. */ | |
203 register int not; | |
204 CHAR cold; | |
205 UCHAR fn; | |
206 | |
207 if (posixly_correct == 0) | |
208 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
209 | |
210 if (n == string_end) | |
211 return FNM_NOMATCH; | |
212 | |
213 if (*n == L('.') && no_leading_period) | |
214 return FNM_NOMATCH; | |
215 | |
216 if (*n == L('/') && (flags & FNM_FILE_NAME)) | |
217 /* `/' cannot be matched. */ | |
218 return FNM_NOMATCH; | |
219 | |
220 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); | |
221 if (not) | |
222 ++p; | |
223 | |
224 fn = FOLD ((UCHAR) *n); | |
225 | |
226 c = *p++; | |
227 for (;;) | |
228 { | |
229 if (!(flags & FNM_NOESCAPE) && c == L('\\')) | |
230 { | |
231 if (*p == L('\0')) | |
232 return FNM_NOMATCH; | |
233 c = FOLD ((UCHAR) *p); | |
234 ++p; | |
235 | |
236 if (c == fn) | |
237 goto matched; | |
238 } | |
239 else if (c == L('[') && *p == L(':')) | |
240 { | |
241 /* Leave room for the null. */ | |
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; | |
243 size_t c1 = 0; | |
3920
678b6b899b7c
(WIDE_CHAR_SUPPORT): New macro. Use it uniformly instead of
Jim Meyering <jim@meyering.net>
parents:
3914
diff
changeset
|
244 #if defined _LIBC || WIDE_CHAR_SUPPORT |
3914 | 245 wctype_t wt; |
246 #endif | |
247 const CHAR *startp = p; | |
248 | |
249 for (;;) | |
250 { | |
251 if (c1 == CHAR_CLASS_MAX_LENGTH) | |
252 /* The name is too long and therefore the pattern | |
253 is ill-formed. */ | |
254 return FNM_NOMATCH; | |
255 | |
256 c = *++p; | |
257 if (c == L(':') && p[1] == L(']')) | |
258 { | |
259 p += 2; | |
260 break; | |
261 } | |
262 if (c < L('a') || c >= L('z')) | |
263 { | |
264 /* This cannot possibly be a character class name. | |
265 Match it as a normal range. */ | |
266 p = startp; | |
267 c = L('['); | |
268 goto normal_bracket; | |
269 } | |
270 str[c1++] = c; | |
271 } | |
272 str[c1] = L('\0'); | |
273 | |
3920
678b6b899b7c
(WIDE_CHAR_SUPPORT): New macro. Use it uniformly instead of
Jim Meyering <jim@meyering.net>
parents:
3914
diff
changeset
|
274 #if defined _LIBC || WIDE_CHAR_SUPPORT |
3914 | 275 wt = IS_CHAR_CLASS (str); |
276 if (wt == 0) | |
277 /* Invalid character class name. */ | |
278 return FNM_NOMATCH; | |
279 | |
280 # if defined _LIBC && ! WIDE_CHAR_VERSION | |
281 /* The following code is glibc specific but does | |
282 there a good job in speeding up the code since | |
283 we can avoid the btowc() call. */ | |
284 if (_ISCTYPE ((UCHAR) *n, wt)) | |
285 goto matched; | |
286 # else | |
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) | |
288 goto matched; | |
289 # endif | |
290 #else | |
291 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n)) | |
292 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n)) | |
293 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n)) | |
294 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n)) | |
295 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n)) | |
296 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n)) | |
297 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n)) | |
298 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n)) | |
299 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n)) | |
300 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n)) | |
301 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n)) | |
302 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n))) | |
303 goto matched; | |
304 #endif | |
305 c = *p++; | |
306 } | |
307 #ifdef _LIBC | |
308 else if (c == L('[') && *p == L('=')) | |
309 { | |
310 UCHAR str[1]; | |
311 uint32_t nrules = | |
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
313 const CHAR *startp = p; | |
314 | |
315 c = *++p; | |
316 if (c == L('\0')) | |
317 { | |
318 p = startp; | |
319 c = L('['); | |
320 goto normal_bracket; | |
321 } | |
322 str[0] = c; | |
323 | |
324 c = *++p; | |
325 if (c != L('=') || p[1] != L(']')) | |
326 { | |
327 p = startp; | |
328 c = L('['); | |
329 goto normal_bracket; | |
330 } | |
331 p += 2; | |
332 | |
333 if (nrules == 0) | |
334 { | |
335 if ((UCHAR) *n == str[0]) | |
336 goto matched; | |
337 } | |
338 else | |
339 { | |
340 const int32_t *table; | |
341 # if WIDE_CHAR_VERSION | |
342 const int32_t *weights; | |
343 const int32_t *extra; | |
344 # else | |
345 const unsigned char *weights; | |
346 const unsigned char *extra; | |
347 # endif | |
348 const int32_t *indirect; | |
349 int32_t idx; | |
350 const UCHAR *cp = (const UCHAR *) str; | |
351 | |
352 /* This #include defines a local function! */ | |
353 # if WIDE_CHAR_VERSION | |
354 # include <locale/weightwc.h> | |
355 # else | |
356 # include <locale/weight.h> | |
357 # endif | |
358 | |
359 # if WIDE_CHAR_VERSION | |
360 table = (const int32_t *) | |
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); | |
362 weights = (const int32_t *) | |
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); | |
364 extra = (const int32_t *) | |
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); | |
366 indirect = (const int32_t *) | |
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); | |
368 # else | |
369 table = (const int32_t *) | |
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); | |
371 weights = (const unsigned char *) | |
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); | |
373 extra = (const unsigned char *) | |
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); | |
375 indirect = (const int32_t *) | |
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); | |
377 # endif | |
378 | |
379 idx = findidx (&cp); | |
380 if (idx != 0) | |
381 { | |
382 /* We found a table entry. Now see whether the | |
383 character we are currently at has the same | |
384 equivalance class value. */ | |
385 int len = weights[idx]; | |
386 int32_t idx2; | |
387 const UCHAR *np = (const UCHAR *) n; | |
388 | |
389 idx2 = findidx (&np); | |
390 if (idx2 != 0 && len == weights[idx2]) | |
391 { | |
392 int cnt = 0; | |
393 | |
394 while (cnt < len | |
395 && (weights[idx + 1 + cnt] | |
396 == weights[idx2 + 1 + cnt])) | |
397 ++cnt; | |
398 | |
399 if (cnt == len) | |
400 goto matched; | |
401 } | |
402 } | |
403 } | |
404 | |
405 c = *p++; | |
406 } | |
407 #endif | |
408 else if (c == L('\0')) | |
409 /* [ (unterminated) loses. */ | |
410 return FNM_NOMATCH; | |
411 else | |
412 { | |
413 int is_range = 0; | |
414 | |
415 #ifdef _LIBC | |
416 int is_seqval = 0; | |
417 | |
418 if (c == L('[') && *p == L('.')) | |
419 { | |
420 uint32_t nrules = | |
421 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
422 const CHAR *startp = p; | |
423 size_t c1 = 0; | |
424 | |
425 while (1) | |
426 { | |
427 c = *++p; | |
428 if (c == L('.') && p[1] == L(']')) | |
429 { | |
430 p += 2; | |
431 break; | |
432 } | |
433 if (c == '\0') | |
434 return FNM_NOMATCH; | |
435 ++c1; | |
436 } | |
437 | |
438 /* We have to handling the symbols differently in | |
439 ranges since then the collation sequence is | |
440 important. */ | |
441 is_range = *p == L('-') && p[1] != L('\0'); | |
442 | |
443 if (nrules == 0) | |
444 { | |
445 /* There are no names defined in the collation | |
446 data. Therefore we only accept the trivial | |
447 names consisting of the character itself. */ | |
448 if (c1 != 1) | |
449 return FNM_NOMATCH; | |
450 | |
451 if (!is_range && *n == startp[1]) | |
452 goto matched; | |
453 | |
454 cold = startp[1]; | |
455 c = *p++; | |
456 } | |
457 else | |
458 { | |
459 int32_t table_size; | |
460 const int32_t *symb_table; | |
461 # ifdef WIDE_CHAR_VERSION | |
462 char str[c1]; | |
463 unsigned int strcnt; | |
464 # else | |
465 # define str (startp + 1) | |
466 # endif | |
467 const unsigned char *extra; | |
468 int32_t idx; | |
469 int32_t elem; | |
470 int32_t second; | |
471 int32_t hash; | |
472 | |
473 # ifdef WIDE_CHAR_VERSION | |
474 /* We have to convert the name to a single-byte | |
475 string. This is possible since the names | |
476 consist of ASCII characters and the internal | |
477 representation is UCS4. */ | |
478 for (strcnt = 0; strcnt < c1; ++strcnt) | |
479 str[strcnt] = startp[1 + strcnt]; | |
480 # endif | |
481 | |
482 table_size = | |
483 _NL_CURRENT_WORD (LC_COLLATE, | |
484 _NL_COLLATE_SYMB_HASH_SIZEMB); | |
485 symb_table = (const int32_t *) | |
486 _NL_CURRENT (LC_COLLATE, | |
487 _NL_COLLATE_SYMB_TABLEMB); | |
488 extra = (const unsigned char *) | |
489 _NL_CURRENT (LC_COLLATE, | |
490 _NL_COLLATE_SYMB_EXTRAMB); | |
491 | |
492 /* Locate the character in the hashing table. */ | |
493 hash = elem_hash (str, c1); | |
494 | |
495 idx = 0; | |
496 elem = hash % table_size; | |
497 second = hash % (table_size - 2); | |
498 while (symb_table[2 * elem] != 0) | |
499 { | |
500 /* First compare the hashing value. */ | |
501 if (symb_table[2 * elem] == hash | |
502 && c1 == extra[symb_table[2 * elem + 1]] | |
503 && memcmp (str, | |
504 &extra[symb_table[2 * elem + 1] | |
505 + 1], c1) == 0) | |
506 { | |
507 /* Yep, this is the entry. */ | |
508 idx = symb_table[2 * elem + 1]; | |
509 idx += 1 + extra[idx]; | |
510 break; | |
511 } | |
512 | |
513 /* Next entry. */ | |
514 elem += second; | |
515 } | |
516 | |
517 if (symb_table[2 * elem] != 0) | |
518 { | |
519 /* Compare the byte sequence but only if | |
520 this is not part of a range. */ | |
521 # ifdef WIDE_CHAR_VERSION | |
522 int32_t *wextra; | |
523 | |
524 idx += 1 + extra[idx]; | |
525 /* Adjust for the alignment. */ | |
526 idx = (idx + 3) & ~3; | |
527 | |
528 wextra = (int32_t *) &extra[idx + 4]; | |
529 # endif | |
530 | |
531 if (! is_range) | |
532 { | |
533 # ifdef WIDE_CHAR_VERSION | |
534 for (c1 = 0; c1 < wextra[idx]; ++c1) | |
535 if (n[c1] != wextra[1 + c1]) | |
536 break; | |
537 | |
538 if (c1 == wextra[idx]) | |
539 goto matched; | |
540 # else | |
541 for (c1 = 0; c1 < extra[idx]; ++c1) | |
542 if (n[c1] != extra[1 + c1]) | |
543 break; | |
544 | |
545 if (c1 == extra[idx]) | |
546 goto matched; | |
547 # endif | |
548 } | |
549 | |
550 /* Get the collation sequence value. */ | |
551 is_seqval = 1; | |
552 # ifdef WIDE_CHAR_VERSION | |
553 cold = wextra[1 + wextra[idx]]; | |
554 # else | |
555 /* Adjust for the alignment. */ | |
556 idx += 1 + extra[idx]; | |
557 idx = (idx + 3) & ~4; | |
558 cold = *((int32_t *) &extra[idx]); | |
559 # endif | |
560 | |
561 c = *p++; | |
562 } | |
563 else if (c1 == 1) | |
564 { | |
565 /* No valid character. Match it as a | |
566 single byte. */ | |
567 if (!is_range && *n == str[0]) | |
568 goto matched; | |
569 | |
570 cold = str[0]; | |
571 c = *p++; | |
572 } | |
573 else | |
574 return FNM_NOMATCH; | |
575 } | |
576 } | |
577 else | |
578 # undef str | |
579 #endif | |
580 { | |
581 c = FOLD (c); | |
582 normal_bracket: | |
583 | |
584 /* We have to handling the symbols differently in | |
585 ranges since then the collation sequence is | |
586 important. */ | |
587 is_range = (*p == L('-') && p[1] != L('\0') | |
588 && p[1] != L(']')); | |
589 | |
590 if (!is_range && c == fn) | |
591 goto matched; | |
592 | |
593 cold = c; | |
594 c = *p++; | |
595 } | |
596 | |
597 if (c == L('-') && *p != L(']')) | |
598 { | |
599 #if _LIBC | |
600 /* We have to find the collation sequence | |
601 value for C. Collation sequence is nothing | |
602 we can regularly access. The sequence | |
603 value is defined by the order in which the | |
604 definitions of the collation values for the | |
605 various characters appear in the source | |
606 file. A strange concept, nowhere | |
607 documented. */ | |
608 uint32_t fcollseq; | |
609 uint32_t lcollseq; | |
610 UCHAR cend = *p++; | |
611 | |
612 # ifdef WIDE_CHAR_VERSION | |
613 /* Search in the `names' array for the characters. */ | |
614 fcollseq = collseq_table_lookup (collseq, fn); | |
615 if (fcollseq == ~((uint32_t) 0)) | |
616 /* XXX We don't know anything about the character | |
617 we are supposed to match. This means we are | |
618 failing. */ | |
619 goto range_not_matched; | |
620 | |
621 if (is_seqval) | |
622 lcollseq = cold; | |
623 else | |
624 lcollseq = collseq_table_lookup (collseq, cold); | |
625 # else | |
626 fcollseq = collseq[fn]; | |
627 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; | |
628 # endif | |
629 | |
630 is_seqval = 0; | |
631 if (cend == L('[') && *p == L('.')) | |
632 { | |
633 uint32_t nrules = | |
634 _NL_CURRENT_WORD (LC_COLLATE, | |
635 _NL_COLLATE_NRULES); | |
636 const CHAR *startp = p; | |
637 size_t c1 = 0; | |
638 | |
639 while (1) | |
640 { | |
641 c = *++p; | |
642 if (c == L('.') && p[1] == L(']')) | |
643 { | |
644 p += 2; | |
645 break; | |
646 } | |
647 if (c == '\0') | |
648 return FNM_NOMATCH; | |
649 ++c1; | |
650 } | |
651 | |
652 if (nrules == 0) | |
653 { | |
654 /* There are no names defined in the | |
655 collation data. Therefore we only | |
656 accept the trivial names consisting | |
657 of the character itself. */ | |
658 if (c1 != 1) | |
659 return FNM_NOMATCH; | |
660 | |
661 cend = startp[1]; | |
662 } | |
663 else | |
664 { | |
665 int32_t table_size; | |
666 const int32_t *symb_table; | |
667 # ifdef WIDE_CHAR_VERSION | |
668 char str[c1]; | |
669 unsigned int strcnt; | |
670 # else | |
671 # define str (startp + 1) | |
672 # endif | |
673 const unsigned char *extra; | |
674 int32_t idx; | |
675 int32_t elem; | |
676 int32_t second; | |
677 int32_t hash; | |
678 | |
679 # ifdef WIDE_CHAR_VERSION | |
680 /* We have to convert the name to a single-byte | |
681 string. This is possible since the names | |
682 consist of ASCII characters and the internal | |
683 representation is UCS4. */ | |
684 for (strcnt = 0; strcnt < c1; ++strcnt) | |
685 str[strcnt] = startp[1 + strcnt]; | |
686 # endif | |
687 | |
688 table_size = | |
689 _NL_CURRENT_WORD (LC_COLLATE, | |
690 _NL_COLLATE_SYMB_HASH_SIZEMB); | |
691 symb_table = (const int32_t *) | |
692 _NL_CURRENT (LC_COLLATE, | |
693 _NL_COLLATE_SYMB_TABLEMB); | |
694 extra = (const unsigned char *) | |
695 _NL_CURRENT (LC_COLLATE, | |
696 _NL_COLLATE_SYMB_EXTRAMB); | |
697 | |
698 /* Locate the character in the hashing | |
699 table. */ | |
700 hash = elem_hash (str, c1); | |
701 | |
702 idx = 0; | |
703 elem = hash % table_size; | |
704 second = hash % (table_size - 2); | |
705 while (symb_table[2 * elem] != 0) | |
706 { | |
707 /* First compare the hashing value. */ | |
708 if (symb_table[2 * elem] == hash | |
709 && (c1 | |
710 == extra[symb_table[2 * elem + 1]]) | |
711 && memcmp (str, | |
712 &extra[symb_table[2 * elem + 1] | |
713 + 1], c1) == 0) | |
714 { | |
715 /* Yep, this is the entry. */ | |
716 idx = symb_table[2 * elem + 1]; | |
717 idx += 1 + extra[idx]; | |
718 break; | |
719 } | |
720 | |
721 /* Next entry. */ | |
722 elem += second; | |
723 } | |
724 | |
725 if (symb_table[2 * elem] != 0) | |
726 { | |
727 /* Compare the byte sequence but only if | |
728 this is not part of a range. */ | |
729 # ifdef WIDE_CHAR_VERSION | |
730 int32_t *wextra; | |
731 | |
732 idx += 1 + extra[idx]; | |
733 /* Adjust for the alignment. */ | |
734 idx = (idx + 3) & ~4; | |
735 | |
736 wextra = (int32_t *) &extra[idx + 4]; | |
737 # endif | |
738 /* Get the collation sequence value. */ | |
739 is_seqval = 1; | |
740 # ifdef WIDE_CHAR_VERSION | |
741 cend = wextra[1 + wextra[idx]]; | |
742 # else | |
743 /* Adjust for the alignment. */ | |
744 idx += 1 + extra[idx]; | |
745 idx = (idx + 3) & ~4; | |
746 cend = *((int32_t *) &extra[idx]); | |
747 # endif | |
748 } | |
749 else if (symb_table[2 * elem] != 0 && c1 == 1) | |
750 { | |
751 cend = str[0]; | |
752 c = *p++; | |
753 } | |
754 else | |
755 return FNM_NOMATCH; | |
756 } | |
757 # undef str | |
758 } | |
759 else | |
760 { | |
761 if (!(flags & FNM_NOESCAPE) && cend == L('\\')) | |
762 cend = *p++; | |
763 if (cend == L('\0')) | |
764 return FNM_NOMATCH; | |
765 cend = FOLD (cend); | |
766 } | |
767 | |
768 /* XXX It is not entirely clear to me how to handle | |
769 characters which are not mentioned in the | |
770 collation specification. */ | |
771 if ( | |
772 # ifdef WIDE_CHAR_VERSION | |
773 lcollseq == 0xffffffff || | |
774 # endif | |
775 lcollseq <= fcollseq) | |
776 { | |
777 /* We have to look at the upper bound. */ | |
778 uint32_t hcollseq; | |
779 | |
780 if (is_seqval) | |
781 hcollseq = cend; | |
782 else | |
783 { | |
784 # ifdef WIDE_CHAR_VERSION | |
785 hcollseq = | |
786 collseq_table_lookup (collseq, cend); | |
787 if (hcollseq == ~((uint32_t) 0)) | |
788 { | |
789 /* Hum, no information about the upper | |
790 bound. The matching succeeds if the | |
791 lower bound is matched exactly. */ | |
792 if (lcollseq != fcollseq) | |
793 goto range_not_matched; | |
794 | |
795 goto matched; | |
796 } | |
797 # else | |
798 hcollseq = collseq[cend]; | |
799 # endif | |
800 } | |
801 | |
802 if (lcollseq <= hcollseq && fcollseq <= hcollseq) | |
803 goto matched; | |
804 } | |
805 # ifdef WIDE_CHAR_VERSION | |
806 range_not_matched: | |
807 # endif | |
808 #else | |
809 /* We use a boring value comparison of the character | |
810 values. This is better than comparing using | |
811 `strcoll' since the latter would have surprising | |
812 and sometimes fatal consequences. */ | |
813 UCHAR cend = *p++; | |
814 | |
815 if (!(flags & FNM_NOESCAPE) && cend == L('\\')) | |
816 cend = *p++; | |
817 if (cend == L('\0')) | |
818 return FNM_NOMATCH; | |
819 | |
820 /* It is a range. */ | |
821 if (cold <= fn && fn <= cend) | |
822 goto matched; | |
823 #endif | |
824 | |
825 c = *p++; | |
826 } | |
827 } | |
828 | |
829 if (c == L(']')) | |
830 break; | |
831 } | |
832 | |
833 if (!not) | |
834 return FNM_NOMATCH; | |
835 break; | |
836 | |
837 matched: | |
838 /* Skip the rest of the [...] that already matched. */ | |
839 do | |
840 { | |
841 ignore_next: | |
842 c = *p++; | |
843 | |
844 if (c == L('\0')) | |
845 /* [... (unterminated) loses. */ | |
846 return FNM_NOMATCH; | |
847 | |
848 if (!(flags & FNM_NOESCAPE) && c == L('\\')) | |
849 { | |
850 if (*p == L('\0')) | |
851 return FNM_NOMATCH; | |
852 /* XXX 1003.2d11 is unclear if this is right. */ | |
853 ++p; | |
854 } | |
855 else if (c == L('[') && *p == L(':')) | |
856 { | |
857 int c1 = 0; | |
858 const CHAR *startp = p; | |
859 | |
860 while (1) | |
861 { | |
862 c = *++p; | |
863 if (++c1 == CHAR_CLASS_MAX_LENGTH) | |
864 return FNM_NOMATCH; | |
865 | |
866 if (*p == L(':') && p[1] == L(']')) | |
867 break; | |
868 | |
869 if (c < L('a') || c >= L('z')) | |
870 { | |
871 p = startp; | |
872 goto ignore_next; | |
873 } | |
874 } | |
875 p += 2; | |
876 c = *p++; | |
877 } | |
878 else if (c == L('[') && *p == L('=')) | |
879 { | |
880 c = *++p; | |
881 if (c == L('\0')) | |
882 return FNM_NOMATCH; | |
883 c = *++p; | |
884 if (c != L('=') || p[1] != L(']')) | |
885 return FNM_NOMATCH; | |
886 p += 2; | |
887 c = *p++; | |
888 } | |
889 else if (c == L('[') && *p == L('.')) | |
890 { | |
891 ++p; | |
892 while (1) | |
893 { | |
894 c = *++p; | |
895 if (c == '\0') | |
896 return FNM_NOMATCH; | |
897 | |
898 if (*p == L('.') && p[1] == L(']')) | |
899 break; | |
900 } | |
901 p += 2; | |
902 c = *p++; | |
903 } | |
904 } | |
905 while (c != L(']')); | |
906 if (not) | |
907 return FNM_NOMATCH; | |
908 } | |
909 break; | |
910 | |
911 case L('+'): | |
912 case L('@'): | |
913 case L('!'): | |
914 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
915 { | |
916 int res; | |
917 | |
918 res = EXT (c, p, n, string_end, no_leading_period, flags); | |
919 if (res != -1) | |
920 return res; | |
921 } | |
922 goto normal_match; | |
923 | |
924 case L('/'): | |
925 if (NO_LEADING_PERIOD (flags)) | |
926 { | |
927 if (n == string_end || c != *n) | |
928 return FNM_NOMATCH; | |
929 | |
930 new_no_leading_period = 1; | |
931 break; | |
932 } | |
933 /* FALLTHROUGH */ | |
934 default: | |
935 normal_match: | |
936 if (n == string_end || c != FOLD ((UCHAR) *n)) | |
937 return FNM_NOMATCH; | |
938 } | |
939 | |
940 no_leading_period = new_no_leading_period; | |
941 ++n; | |
942 } | |
943 | |
944 if (n == string_end) | |
945 return 0; | |
946 | |
947 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/')) | |
948 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | |
949 return 0; | |
950 | |
951 return FNM_NOMATCH; | |
952 } | |
953 | |
954 | |
955 static const CHAR * | |
956 internal_function | |
957 END (const CHAR *pattern) | |
958 { | |
959 const CHAR *p = pattern; | |
960 | |
961 while (1) | |
962 if (*++p == L('\0')) | |
963 /* This is an invalid pattern. */ | |
964 return pattern; | |
965 else if (*p == L('[')) | |
966 { | |
967 /* Handle brackets special. */ | |
968 if (posixly_correct == 0) | |
969 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
970 | |
971 /* Skip the not sign. We have to recognize it because of a possibly | |
972 following ']'. */ | |
973 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) | |
974 ++p; | |
975 /* A leading ']' is recognized as such. */ | |
976 if (*p == L(']')) | |
977 ++p; | |
978 /* Skip over all characters of the list. */ | |
979 while (*p != L(']')) | |
980 if (*p++ == L('\0')) | |
981 /* This is no valid pattern. */ | |
982 return pattern; | |
983 } | |
984 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') | |
985 || *p == L('!')) && p[1] == L('(')) | |
986 p = END (p + 1); | |
987 else if (*p == L(')')) | |
988 break; | |
989 | |
990 return p + 1; | |
991 } | |
992 | |
993 | |
994 static int | |
995 internal_function | |
996 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
997 int no_leading_period, int flags) | |
998 { | |
999 const CHAR *startp; | |
1000 int level; | |
1001 struct patternlist | |
1002 { | |
1003 struct patternlist *next; | |
1004 CHAR str[1]; | |
1005 } *list = NULL; | |
1006 struct patternlist **lastp = &list; | |
1007 size_t pattern_len = STRLEN (pattern); | |
1008 const CHAR *p; | |
1009 const CHAR *rs; | |
1010 | |
1011 /* Parse the pattern. Store the individual parts in the list. */ | |
1012 level = 0; | |
1013 for (startp = p = pattern + 1; level >= 0; ++p) | |
1014 if (*p == L('\0')) | |
1015 /* This is an invalid pattern. */ | |
1016 return -1; | |
1017 else if (*p == L('[')) | |
1018 { | |
1019 /* Handle brackets special. */ | |
1020 if (posixly_correct == 0) | |
1021 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
1022 | |
1023 /* Skip the not sign. We have to recognize it because of a possibly | |
1024 following ']'. */ | |
1025 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) | |
1026 ++p; | |
1027 /* A leading ']' is recognized as such. */ | |
1028 if (*p == L(']')) | |
1029 ++p; | |
1030 /* Skip over all characters of the list. */ | |
1031 while (*p != L(']')) | |
1032 if (*p++ == L('\0')) | |
1033 /* This is no valid pattern. */ | |
1034 return -1; | |
1035 } | |
1036 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') | |
1037 || *p == L('!')) && p[1] == L('(')) | |
1038 /* Remember the nesting level. */ | |
1039 ++level; | |
1040 else if (*p == L(')')) | |
1041 { | |
1042 if (level-- == 0) | |
1043 { | |
1044 /* This means we found the end of the pattern. */ | |
1045 #define NEW_PATTERN \ | |
1046 struct patternlist *newp; \ | |
4502
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1047 size_t plen; \ |
3914 | 1048 \ |
4502
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1049 plen = (opt == L('?') || opt == L('@') \ |
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1050 ? pattern_len \ |
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1051 : p - startp + 1); \ |
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1052 newp = (struct patternlist *) \ |
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1053 alloca (offsetof (struct patternlist, str) \ |
83b0c8b8ce8f
(NEW_PATTERN): Cast alloca return value to proper type.
Paul Eggert <eggert@cs.ucla.edu>
parents:
3920
diff
changeset
|
1054 + (plen * sizeof (CHAR))); \ |
3914 | 1055 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \ |
1056 newp->next = NULL; \ | |
1057 *lastp = newp; \ | |
1058 lastp = &newp->next | |
1059 NEW_PATTERN; | |
1060 } | |
1061 } | |
1062 else if (*p == L('|')) | |
1063 { | |
1064 if (level == 0) | |
1065 { | |
1066 NEW_PATTERN; | |
1067 startp = p + 1; | |
1068 } | |
1069 } | |
1070 assert (list != NULL); | |
1071 assert (p[-1] == L(')')); | |
1072 #undef NEW_PATTERN | |
1073 | |
1074 switch (opt) | |
1075 { | |
1076 case L('*'): | |
1077 if (FCT (p, string, string_end, no_leading_period, flags) == 0) | |
1078 return 0; | |
1079 /* FALLTHROUGH */ | |
1080 | |
1081 case L('+'): | |
1082 do | |
1083 { | |
1084 for (rs = string; rs <= string_end; ++rs) | |
1085 /* First match the prefix with the current pattern with the | |
1086 current pattern. */ | |
1087 if (FCT (list->str, string, rs, no_leading_period, | |
1088 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 | |
1089 /* This was successful. Now match the rest with the rest | |
1090 of the pattern. */ | |
1091 && (FCT (p, rs, string_end, | |
1092 rs == string | |
1093 ? no_leading_period | |
1094 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, | |
1095 flags & FNM_FILE_NAME | |
1096 ? flags : flags & ~FNM_PERIOD) == 0 | |
1097 /* This didn't work. Try the whole pattern. */ | |
1098 || (rs != string | |
1099 && FCT (pattern - 1, rs, string_end, | |
1100 rs == string | |
1101 ? no_leading_period | |
1102 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags) | |
1103 ? 1 : 0), | |
1104 flags & FNM_FILE_NAME | |
1105 ? flags : flags & ~FNM_PERIOD) == 0))) | |
1106 /* It worked. Signal success. */ | |
1107 return 0; | |
1108 } | |
1109 while ((list = list->next) != NULL); | |
1110 | |
1111 /* None of the patterns lead to a match. */ | |
1112 return FNM_NOMATCH; | |
1113 | |
1114 case L('?'): | |
1115 if (FCT (p, string, string_end, no_leading_period, flags) == 0) | |
1116 return 0; | |
1117 /* FALLTHROUGH */ | |
1118 | |
1119 case L('@'): | |
1120 do | |
1121 /* I cannot believe it but `strcat' is actually acceptable | |
1122 here. Match the entire string with the prefix from the | |
1123 pattern list and the rest of the pattern following the | |
1124 pattern list. */ | |
1125 if (FCT (STRCAT (list->str, p), string, string_end, | |
1126 no_leading_period, | |
1127 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) | |
1128 /* It worked. Signal success. */ | |
1129 return 0; | |
1130 while ((list = list->next) != NULL); | |
1131 | |
1132 /* None of the patterns lead to a match. */ | |
1133 return FNM_NOMATCH; | |
1134 | |
1135 case L('!'): | |
1136 for (rs = string; rs <= string_end; ++rs) | |
1137 { | |
1138 struct patternlist *runp; | |
1139 | |
1140 for (runp = list; runp != NULL; runp = runp->next) | |
1141 if (FCT (runp->str, string, rs, no_leading_period, | |
1142 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) | |
1143 break; | |
1144 | |
1145 /* If none of the patterns matched see whether the rest does. */ | |
1146 if (runp == NULL | |
1147 && (FCT (p, rs, string_end, | |
1148 rs == string | |
1149 ? no_leading_period | |
1150 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, | |
1151 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) | |
1152 == 0)) | |
1153 /* This is successful. */ | |
1154 return 0; | |
1155 } | |
1156 | |
1157 /* None of the patterns together with the rest of the pattern | |
1158 lead to a match. */ | |
1159 return FNM_NOMATCH; | |
1160 | |
1161 default: | |
1162 assert (! "Invalid extended matching operator"); | |
1163 break; | |
1164 } | |
1165 | |
1166 return -1; | |
1167 } | |
1168 | |
1169 | |
1170 #undef FOLD | |
1171 #undef CHAR | |
1172 #undef UCHAR | |
1173 #undef INT | |
1174 #undef FCT | |
1175 #undef EXT | |
1176 #undef END | |
1177 #undef MEMPCPY | |
1178 #undef MEMCHR | |
1179 #undef STRCOLL | |
1180 #undef STRLEN | |
1181 #undef STRCAT | |
1182 #undef L | |
1183 #undef BTOWC |