Mercurial > hg > octave-kai > gnulib-hg
annotate lib/regcomp.c @ 6170:a10e4460ad4c
* lib/regex_internal.h (re_sub_match_top_t): Remove unused member
next_last_offset.
(struct re_dfa_t): Remove unused member states_alloc.
* lib/regcomp.c (init_dfa): Don't initialize unused members.
* config/srclist.txt: Add glibc bug 1273.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Wed, 31 Aug 2005 18:08:34 +0000 |
parents | c3bf2ea44695 |
children | 5862ee08bfc1 |
rev | line source |
---|---|
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1 /* Extended regular expression matching and search library. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
3 This file is part of the GNU C Library. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
5 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
6 This program is free software; you can redistribute it and/or modify |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
7 it under the terms of the GNU General Public License as published by |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
8 the Free Software Foundation; either version 2, or (at your option) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
9 any later version. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
10 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
11 This program is distributed in the hope that it will be useful, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
14 GNU General Public License for more details. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
15 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
16 You should have received a copy of the GNU General Public License along |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
17 with this program; if not, write to the Free Software Foundation, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
19 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
20 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
21 int length, reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
22 static void re_compile_fastmap_iter (regex_t *bufp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
23 const re_dfastate_t *init_state, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
24 char *fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
25 static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
26 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
27 static void free_charset (re_charset_t *cset); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
28 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
29 static void free_workarea_compile (regex_t *preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
30 static reg_errcode_t create_initial_state (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
31 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
32 static void optimize_utf8 (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
33 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
34 static reg_errcode_t analyze (regex_t *preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
35 static reg_errcode_t preorder (bin_tree_t *root, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
36 reg_errcode_t (fn (void *, bin_tree_t *)), |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
37 void *extra); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
38 static reg_errcode_t postorder (bin_tree_t *root, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
39 reg_errcode_t (fn (void *, bin_tree_t *)), |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
40 void *extra); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
41 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
42 static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
43 static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
44 bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
45 static reg_errcode_t calc_first (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
46 static reg_errcode_t calc_next (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
47 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); |
6071
c1760162e42f
(duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5972
diff
changeset
|
48 static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
49 static int search_duplicated_node (re_dfa_t *dfa, int org_node, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
50 unsigned int constraint); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
51 static reg_errcode_t calc_eclosure (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
52 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
53 int node, int root); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
54 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
55 static int fetch_number (re_string_t *input, re_token_t *token, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
56 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
57 static int peek_token (re_token_t *token, re_string_t *input, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
58 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
59 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
60 reg_syntax_t syntax, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
61 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
62 re_token_t *token, reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
63 int nest, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
64 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
65 re_token_t *token, reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
66 int nest, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
67 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
68 re_token_t *token, reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
69 int nest, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
70 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
71 re_token_t *token, reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
72 int nest, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
73 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
74 re_dfa_t *dfa, re_token_t *token, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
75 reg_syntax_t syntax, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
76 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
77 re_token_t *token, reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
78 reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
79 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
80 re_string_t *regexp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
81 re_token_t *token, int token_len, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
82 re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
83 reg_syntax_t syntax, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
84 int accept_hyphen); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
85 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
86 re_string_t *regexp, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
87 re_token_t *token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
88 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
89 static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
90 re_charset_t *mbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
91 int *equiv_class_alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
92 const unsigned char *name); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
93 static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
94 re_bitset_ptr_t sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
95 re_charset_t *mbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
96 int *char_class_alloc, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
97 const unsigned char *class_name, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
98 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
99 #else /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
100 static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
101 const unsigned char *name); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
102 static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
103 re_bitset_ptr_t sbcset, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
104 const unsigned char *class_name, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
105 reg_syntax_t syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
106 #endif /* not RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
107 static bin_tree_t *build_charclass_op (re_dfa_t *dfa, |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
108 unsigned REG_TRANSLATE_TYPE trans, |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
109 const unsigned char *class_name, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
110 const unsigned char *extra, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
111 int non_match, reg_errcode_t *err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
112 static bin_tree_t *create_tree (re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
113 bin_tree_t *left, bin_tree_t *right, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
114 re_token_type_t type); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
115 static bin_tree_t *create_token_tree (re_dfa_t *dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
116 bin_tree_t *left, bin_tree_t *right, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
117 const re_token_t *token); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
118 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
119 static void free_token (re_token_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
120 static reg_errcode_t free_tree (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
121 static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
122 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
123 /* This table gives an error message for each of the error codes listed |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
124 in regex.h. Obviously the order here has to be same as there. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
125 POSIX doesn't require that we do anything for REG_NOERROR, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
126 but why not be nice? */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
127 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
128 const char __re_error_msgid[] attribute_hidden = |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
129 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
130 #define REG_NOERROR_IDX 0 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
131 gettext_noop ("Success") /* REG_NOERROR */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
132 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
133 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
134 gettext_noop ("No match") /* REG_NOMATCH */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
135 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
136 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
137 gettext_noop ("Invalid regular expression") /* REG_BADPAT */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
138 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
139 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
140 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
141 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
142 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
143 gettext_noop ("Invalid character class name") /* REG_ECTYPE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
144 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
145 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
146 gettext_noop ("Trailing backslash") /* REG_EESCAPE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
147 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
148 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
149 gettext_noop ("Invalid back reference") /* REG_ESUBREG */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
150 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
151 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
152 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
153 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
154 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
155 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
156 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
157 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
158 gettext_noop ("Unmatched \\{") /* REG_EBRACE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
159 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
160 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
161 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
162 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
163 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
164 gettext_noop ("Invalid range end") /* REG_ERANGE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
165 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
166 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
167 gettext_noop ("Memory exhausted") /* REG_ESPACE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
168 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
169 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
170 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
171 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
172 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
173 gettext_noop ("Premature end of regular expression") /* REG_EEND */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
174 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
175 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
176 gettext_noop ("Regular expression too big") /* REG_ESIZE */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
177 "\0" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
178 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
179 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
180 }; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
181 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
182 const size_t __re_error_msgid_idx[] attribute_hidden = |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
183 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
184 REG_NOERROR_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
185 REG_NOMATCH_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
186 REG_BADPAT_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
187 REG_ECOLLATE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
188 REG_ECTYPE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
189 REG_EESCAPE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
190 REG_ESUBREG_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
191 REG_EBRACK_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
192 REG_EPAREN_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
193 REG_EBRACE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
194 REG_BADBR_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
195 REG_ERANGE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
196 REG_ESPACE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
197 REG_BADRPT_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
198 REG_EEND_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
199 REG_ESIZE_IDX, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
200 REG_ERPAREN_IDX |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
201 }; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
202 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
203 /* Entry points for GNU code. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
204 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
205 /* re_compile_pattern is the GNU regular expression compiler: it |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
206 compiles PATTERN (of length LENGTH) and puts the result in BUFP. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
207 Returns 0 if the pattern was valid, otherwise an error string. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
208 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
209 Assumes the `re_allocated' (and perhaps `re_buffer') and `translate' fields |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
210 are set in BUFP on entry. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
211 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
212 const char * |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
213 re_compile_pattern (const char *pattern, size_t length, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
214 struct re_pattern_buffer *bufp) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
215 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
216 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
217 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
218 /* And GNU code determines whether or not to get register information |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
219 by passing null for the REGS argument to re_match, etc., not by |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
220 setting re_no_sub, unless REG_NO_SUB is set. */ |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
221 bufp->re_no_sub = !!(re_syntax_options & REG_NO_SUB); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
222 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
223 /* Match anchors at newline. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
224 bufp->re_newline_anchor = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
225 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
226 ret = re_compile_internal (bufp, pattern, length, re_syntax_options); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
227 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
228 if (!ret) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
229 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
230 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
231 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
232 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
233 weak_alias (__re_compile_pattern, re_compile_pattern) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
234 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
235 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
236 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
237 also be assigned to arbitrarily: each pattern buffer stores its own |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
238 syntax, so it can be changed between regex compilations. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
239 /* This has no initializer because initialized variables in Emacs |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
240 become read-only after dumping. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
241 reg_syntax_t re_syntax_options; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
242 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
243 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
244 /* Specify the precise syntax of regexps for compilation. This provides |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
245 for compatibility for various utilities which historically have |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
246 different, incompatible syntaxes. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
247 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
248 The argument SYNTAX is a bit mask comprised of the various bits |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
249 defined in regex.h. We return the old syntax. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
250 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
251 reg_syntax_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
252 re_set_syntax (reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
253 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
254 reg_syntax_t ret = re_syntax_options; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
255 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
256 re_syntax_options = syntax; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
257 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
258 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
259 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
260 weak_alias (__re_set_syntax, re_set_syntax) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
261 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
262 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
263 int |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
264 re_compile_fastmap (struct re_pattern_buffer *bufp) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
265 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
266 re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
267 char *fastmap = bufp->re_fastmap; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
268 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
269 memset (fastmap, '\0', sizeof (char) * SBC_MAX); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
270 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
271 if (dfa->init_state != dfa->init_state_word) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
272 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
273 if (dfa->init_state != dfa->init_state_nl) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
274 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
275 if (dfa->init_state != dfa->init_state_begbuf) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
276 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
277 bufp->re_fastmap_accurate = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
278 return 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
279 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
280 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
281 weak_alias (__re_compile_fastmap, re_compile_fastmap) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
282 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
283 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
284 static inline void |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
285 __attribute ((always_inline)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
286 re_set_fastmap (char *fastmap, int icase, int ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
287 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
288 fastmap[ch] = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
289 if (icase) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
290 fastmap[tolower (ch)] = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
291 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
292 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
293 /* Helper function for re_compile_fastmap. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
294 Compile fastmap for the initial_state INIT_STATE. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
295 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
296 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
297 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
298 char *fastmap) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
299 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
300 re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
301 int node_cnt; |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
302 int icase = (dfa->mb_cur_max == 1 && (bufp->re_syntax & REG_IGNORE_CASE)); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
303 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
304 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
305 int node = init_state->nodes.elems[node_cnt]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
306 re_token_type_t type = dfa->nodes[node].type; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
307 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
308 if (type == CHARACTER) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
309 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
310 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
311 #ifdef RE_ENABLE_I18N |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
312 if ((bufp->re_syntax & REG_IGNORE_CASE) && dfa->mb_cur_max > 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
313 { |
6119
c3bf2ea44695
Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
314 unsigned char buf[MB_LEN_MAX]; |
c3bf2ea44695
Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
315 unsigned char *p; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
316 wchar_t wc; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
317 mbstate_t state; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
318 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
319 p = buf; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
320 *p++ = dfa->nodes[node].opr.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
321 while (++node < dfa->nodes_len |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
322 && dfa->nodes[node].type == CHARACTER |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
323 && dfa->nodes[node].mb_partial) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
324 *p++ = dfa->nodes[node].opr.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
325 memset (&state, 0, sizeof (state)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
326 if (mbrtowc (&wc, (const char *) buf, p - buf, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
327 &state) == p - buf |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
328 && (__wcrtomb ((char *) buf, towlower (wc), &state) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
329 != (size_t) -1)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
330 re_set_fastmap (fastmap, 0, buf[0]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
331 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
332 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
333 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
334 else if (type == SIMPLE_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
335 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
336 int i, j, ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
337 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
338 for (j = 0; j < UINT_BITS; ++j, ++ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
339 if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
340 re_set_fastmap (fastmap, icase, ch); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
341 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
342 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
343 else if (type == COMPLEX_BRACKET) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
344 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
345 int i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
346 re_charset_t *cset = dfa->nodes[node].opr.mbcset; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
347 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
348 || cset->nranges || cset->nchar_classes) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
349 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
350 # ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
351 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
352 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
353 /* In this case we want to catch the bytes which are |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
354 the first byte of any collation elements. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
355 e.g. In da_DK, we want to catch 'a' since "aa" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
356 is a valid collation element, and don't catch |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
357 'b' since 'b' is the only collation element |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
358 which starts from 'b'. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
359 int j, ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
360 const int32_t *table = (const int32_t *) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
362 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
363 for (j = 0; j < UINT_BITS; ++j, ++ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
364 if (table[ch] < 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
365 re_set_fastmap (fastmap, icase, ch); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
366 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
367 # else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
368 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
369 for (i = 0; i < SBC_MAX; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
370 if (__btowc (i) == WEOF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
371 re_set_fastmap (fastmap, icase, i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
372 # endif /* not _LIBC */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
373 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
374 for (i = 0; i < cset->nmbchars; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
375 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
376 char buf[256]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
377 mbstate_t state; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
378 memset (&state, '\0', sizeof (state)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
379 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
380 re_set_fastmap (fastmap, icase, *(unsigned char *) buf); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
381 if ((bufp->re_syntax & REG_IGNORE_CASE) && dfa->mb_cur_max > 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
382 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
383 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
384 != (size_t) -1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
385 re_set_fastmap (fastmap, 0, *(unsigned char *) buf); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
386 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
387 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
388 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
389 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
390 else if (type == OP_PERIOD |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
391 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
392 || type == OP_UTF8_PERIOD |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
393 #endif /* RE_ENABLE_I18N */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
394 || type == END_OF_RE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
395 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
396 memset (fastmap, '\1', sizeof (char) * SBC_MAX); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
397 if (type == END_OF_RE) |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
398 bufp->re_can_be_null = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
399 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
400 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
401 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
402 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
403 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
404 /* Entry point for POSIX code. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
405 /* regcomp takes a regular expression as a string and compiles it. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
406 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
407 PREG is a regex_t *. We do not expect any fields to be initialized, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
408 since POSIX says we shouldn't. Thus, we set |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
409 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
410 `re_buffer' to the compiled pattern; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
411 `re_used' to the length of the compiled pattern; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
412 `re_syntax' to REG_SYNTAX_POSIX_EXTENDED if the |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
413 REG_EXTENDED bit in CFLAGS is set; otherwise, to |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
414 REG_SYNTAX_POSIX_BASIC; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
415 `re_newline_anchor' to REG_NEWLINE being set in CFLAGS; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
416 `re_fastmap' to an allocated space for the fastmap; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
417 `re_fastmap_accurate' to zero; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
418 `re_nsub' to the number of subexpressions in PATTERN. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
419 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
420 PATTERN is the address of the pattern string. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
421 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
422 CFLAGS is a series of bits which affect compilation. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
423 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
424 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
425 use POSIX basic syntax. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
426 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
427 If REG_NEWLINE is set, then . and [^...] don't match newline. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
428 Also, regexec will try a match beginning after every newline. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
429 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
430 If REG_ICASE is set, then we considers upper- and lowercase |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
431 versions of letters to be equivalent when matching. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
432 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
433 If REG_NOSUB is set, then when PREG is passed to regexec, that |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
434 routine will report only success or failure, and nothing about the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
435 registers. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
436 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
437 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
438 the return codes and their meanings.) */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
439 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
440 int |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
441 regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
442 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
443 reg_errcode_t ret; |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
444 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? REG_SYNTAX_POSIX_EXTENDED |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
445 : REG_SYNTAX_POSIX_BASIC); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
446 |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
447 preg->re_buffer = NULL; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
448 preg->re_allocated = 0; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
449 preg->re_used = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
450 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
451 /* Try to allocate space for the fastmap. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
452 preg->re_fastmap = re_malloc (char, SBC_MAX); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
453 if (BE (preg->re_fastmap == NULL, 0)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
454 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
455 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
456 syntax |= (cflags & REG_ICASE) ? REG_IGNORE_CASE : 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
457 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
458 /* If REG_NEWLINE is set, newlines are treated differently. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
459 if (cflags & REG_NEWLINE) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
460 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
461 syntax &= ~REG_DOT_NEWLINE; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
462 syntax |= REG_HAT_LISTS_NOT_NEWLINE; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
463 /* It also changes the matching behavior. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
464 preg->re_newline_anchor = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
465 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
466 else |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
467 preg->re_newline_anchor = 0; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
468 preg->re_no_sub = !!(cflags & REG_NOSUB); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
469 preg->re_translate = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
470 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
471 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
472 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
473 /* POSIX doesn't distinguish between an unmatched open-group and an |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
474 unmatched close-group: both are REG_EPAREN. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
475 if (ret == REG_ERPAREN) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
476 ret = REG_EPAREN; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
477 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
478 /* We have already checked preg->re_fastmap != NULL. */ |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
479 if (BE (ret == REG_NOERROR, 1)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
480 /* Compute the fastmap now, since regexec cannot modify the pattern |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
481 buffer. This function never fails in this implementation. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
482 (void) re_compile_fastmap (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
483 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
484 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
485 /* Some error occurred while compiling the expression. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
486 re_free (preg->re_fastmap); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
487 preg->re_fastmap = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
488 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
489 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
490 return (int) ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
491 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
492 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
493 weak_alias (__regcomp, regcomp) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
494 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
495 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
496 /* Returns a message corresponding to an error code, ERRCODE, returned |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
497 from either regcomp or regexec. We don't use PREG here. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
498 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
499 size_t |
6104
1e308ce32c4c
* config/srclist.txt: Add glibc bug 1240.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6101
diff
changeset
|
500 regerror (int errcode, const regex_t *__restrict preg, |
1e308ce32c4c
* config/srclist.txt: Add glibc bug 1240.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6101
diff
changeset
|
501 char *__restrict errbuf, size_t errbuf_size) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
502 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
503 const char *msg; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
504 size_t msg_size; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
505 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
506 if (BE (errcode < 0 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
507 || errcode >= (int) (sizeof (__re_error_msgid_idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
508 / sizeof (__re_error_msgid_idx[0])), 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
509 /* Only error codes returned by the rest of the code should be passed |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
510 to this routine. If we are given anything else, or if other regex |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
511 code generates an invalid error code, then the program has a bug. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
512 Dump core so we can fix it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
513 abort (); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
514 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
515 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
516 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
517 msg_size = strlen (msg) + 1; /* Includes the null. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
518 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
519 if (BE (errbuf_size != 0, 1)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
520 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
521 if (BE (msg_size > errbuf_size, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
522 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
523 #if defined HAVE_MEMPCPY || defined _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
524 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
525 #else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
526 memcpy (errbuf, msg, errbuf_size - 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
527 errbuf[errbuf_size - 1] = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
528 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
529 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
530 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
531 memcpy (errbuf, msg, msg_size); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
532 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
533 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
534 return msg_size; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
535 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
536 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
537 weak_alias (__regerror, regerror) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
538 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
539 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
540 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
541 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
542 /* This static array is used for the map to single-byte characters when |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
543 UTF-8 is used. Otherwise we would allocate memory just to initialize |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
544 it the same all the time. UTF-8 is the preferred encoding so this is |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
545 a worthwhile optimization. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
546 static const bitset utf8_sb_map = |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
547 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
548 /* Set the first 128 bits. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
549 # if UINT_MAX == 0xffffffff |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
550 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
551 # else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
552 # error "Add case for new unsigned int size" |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
553 # endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
554 }; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
555 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
556 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
557 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
558 static void |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
559 free_dfa_content (re_dfa_t *dfa) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
560 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
561 int i, j; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
562 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
563 if (dfa->nodes) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
564 for (i = 0; i < dfa->nodes_len; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
565 free_token (dfa->nodes + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
566 re_free (dfa->nexts); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
567 for (i = 0; i < dfa->nodes_len; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
568 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
569 if (dfa->eclosures != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
570 re_node_set_free (dfa->eclosures + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
571 if (dfa->inveclosures != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
572 re_node_set_free (dfa->inveclosures + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
573 if (dfa->edests != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
574 re_node_set_free (dfa->edests + i); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
575 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
576 re_free (dfa->edests); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
577 re_free (dfa->eclosures); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
578 re_free (dfa->inveclosures); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
579 re_free (dfa->nodes); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
580 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
581 if (dfa->state_table) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
582 for (i = 0; i <= dfa->state_hash_mask; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
583 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
584 struct re_state_table_entry *entry = dfa->state_table + i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
585 for (j = 0; j < entry->num; ++j) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
586 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
587 re_dfastate_t *state = entry->array[j]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
588 free_state (state); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
589 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
590 re_free (entry->array); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
591 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
592 re_free (dfa->state_table); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
593 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
594 if (dfa->sb_char != utf8_sb_map) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
595 re_free (dfa->sb_char); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
596 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
597 re_free (dfa->subexp_map); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
598 #ifdef DEBUG |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
599 re_free (dfa->re_str); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
600 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
601 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
602 re_free (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
603 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
604 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
605 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
606 /* Free dynamically allocated space used by PREG. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
607 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
608 void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
609 regfree (regex_t *preg) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
610 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
611 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
612 if (BE (dfa != NULL, 1)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
613 free_dfa_content (dfa); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
614 preg->re_buffer = NULL; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
615 preg->re_allocated = 0; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
616 |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
617 re_free (preg->re_fastmap); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
618 preg->re_fastmap = NULL; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
619 |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
620 re_free (preg->re_translate); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
621 preg->re_translate = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
622 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
623 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
624 weak_alias (__regfree, regfree) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
625 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
626 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
627 /* Entry points compatible with 4.2 BSD regex library. We don't define |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
628 them unless specifically requested. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
629 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
630 #if defined _REGEX_RE_COMP || defined _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
631 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
632 /* BSD has one and only one pattern buffer. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
633 static struct re_pattern_buffer re_comp_buf; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
634 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
635 char * |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
636 # ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
637 /* Make these definitions weak in libc, so POSIX programs can redefine |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
638 these names if they don't use our functions, and still use |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
639 regcomp/regexec above without link errors. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
640 weak_function |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
641 # endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
642 re_comp (s) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
643 const char *s; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
644 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
645 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
646 char *fastmap; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
647 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
648 if (!s) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
649 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
650 if (!re_comp_buf.re_buffer) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
651 return gettext ("No previous regular expression"); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
652 return 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
653 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
654 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
655 if (re_comp_buf.re_buffer) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
656 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
657 fastmap = re_comp_buf.re_fastmap; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
658 re_comp_buf.re_fastmap = NULL; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
659 __regfree (&re_comp_buf); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
660 memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
661 re_comp_buf.re_fastmap = fastmap; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
662 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
663 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
664 if (re_comp_buf.re_fastmap == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
665 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
666 re_comp_buf.re_fastmap = (char *) malloc (SBC_MAX); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
667 if (re_comp_buf.re_fastmap == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
668 return (char *) gettext (__re_error_msgid |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
669 + __re_error_msgid_idx[(int) REG_ESPACE]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
670 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
671 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
672 /* Since `re_exec' always passes NULL for the `regs' argument, we |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
673 don't need to initialize the pattern buffer fields which affect it. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
674 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
675 /* Match anchors at newlines. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
676 re_comp_buf.re_newline_anchor = 1; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
677 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
678 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
679 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
680 if (!ret) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
681 return NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
682 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
683 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
684 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
685 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
686 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
687 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
688 libc_freeres_fn (free_mem) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
689 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
690 __regfree (&re_comp_buf); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
691 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
692 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
693 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
694 #endif /* _REGEX_RE_COMP */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
695 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
696 /* Internal entry point. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
697 Compile the regular expression PATTERN, whose length is LENGTH. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
698 SYNTAX indicate regular expression's syntax. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
699 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
700 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
701 re_compile_internal (regex_t *preg, const char * pattern, int length, |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
702 reg_syntax_t syntax) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
703 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
704 reg_errcode_t err = REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
705 re_dfa_t *dfa; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
706 re_string_t regexp; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
707 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
708 /* Initialize the pattern buffer. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
709 preg->re_fastmap_accurate = 0; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
710 preg->re_syntax = syntax; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
711 preg->re_not_bol = preg->re_not_eol = 0; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
712 preg->re_used = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
713 preg->re_nsub = 0; |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
714 preg->re_can_be_null = 0; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
715 preg->re_regs_allocated = REG_UNALLOCATED; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
716 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
717 /* Initialize the dfa. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
718 dfa = (re_dfa_t *) preg->re_buffer; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
719 if (BE (preg->re_allocated < sizeof (re_dfa_t), 0)) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
720 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
721 /* If zero allocated, but buffer is non-null, try to realloc |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
722 enough space. This loses if buffer's address is bogus, but |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
723 that is the user's responsibility. If buffer is null this |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
724 is a simple allocation. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
725 dfa = re_realloc (preg->re_buffer, re_dfa_t, 1); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
726 if (dfa == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
727 return REG_ESPACE; |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
728 preg->re_allocated = sizeof (re_dfa_t); |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
729 preg->re_buffer = (unsigned char *) dfa; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
730 } |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
731 preg->re_used = sizeof (re_dfa_t); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
732 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
733 __libc_lock_init (dfa->lock); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
734 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
735 err = init_dfa (dfa, length); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
736 if (BE (err != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
737 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
738 free_dfa_content (dfa); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
739 preg->re_buffer = NULL; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
740 preg->re_allocated = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
741 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
742 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
743 #ifdef DEBUG |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
744 dfa->re_str = re_malloc (char, length + 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
745 strncpy (dfa->re_str, pattern, length + 1); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
746 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
747 |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
748 err = re_string_construct (®exp, pattern, length, preg->re_translate, |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
749 syntax & REG_IGNORE_CASE, dfa); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
750 if (BE (err != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
751 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
752 re_compile_internal_free_return: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
753 free_workarea_compile (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
754 re_string_destruct (®exp); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
755 free_dfa_content (dfa); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
756 preg->re_buffer = NULL; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
757 preg->re_allocated = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
758 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
759 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
760 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
761 /* Parse the regular expression, and build a structure tree. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
762 preg->re_nsub = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
763 dfa->str_tree = parse (®exp, preg, syntax, &err); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
764 if (BE (dfa->str_tree == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
765 goto re_compile_internal_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
766 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
767 /* Analyze the tree and create the nfa. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
768 err = analyze (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
769 if (BE (err != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
770 goto re_compile_internal_free_return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
771 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
772 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
773 /* If possible, do searching in single byte encoding to speed things up. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
774 if (dfa->is_utf8 && !(syntax & REG_IGNORE_CASE) && preg->re_translate == NULL) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
775 optimize_utf8 (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
776 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
777 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
778 /* Then create the initial state of the dfa. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
779 err = create_initial_state (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
780 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
781 /* Release work areas. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
782 free_workarea_compile (preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
783 re_string_destruct (®exp); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
784 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
785 if (BE (err != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
786 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
787 free_dfa_content (dfa); |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
788 preg->re_buffer = NULL; |
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
789 preg->re_allocated = 0; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
790 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
791 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
792 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
793 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
794 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
795 /* Initialize DFA. We use the length of the regular expression PAT_LEN |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
796 as the initial length of some arrays. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
797 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
798 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
799 init_dfa (re_dfa_t *dfa, int pat_len) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
800 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
801 int table_size; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
802 #ifndef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
803 char *codeset_name; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
804 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
805 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
806 memset (dfa, '\0', sizeof (re_dfa_t)); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
807 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
808 /* Force allocation of str_tree_storage the first time. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
809 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
810 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
811 dfa->nodes_alloc = pat_len + 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
812 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
813 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
814 /* table_size = 2 ^ ceil(log pat_len) */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
815 for (table_size = 1; table_size > 0; table_size <<= 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
816 if (table_size > pat_len) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
817 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
818 |
6119
c3bf2ea44695
Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
819 dfa->state_table = re_calloc (struct re_state_table_entry, table_size); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
820 dfa->state_hash_mask = table_size - 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
821 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
822 dfa->mb_cur_max = MB_CUR_MAX; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
823 #ifdef _LIBC |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
824 if (dfa->mb_cur_max == 6 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
825 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
826 dfa->is_utf8 = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
827 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
828 != 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
829 #else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
830 # ifdef HAVE_LANGINFO_CODESET |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
831 codeset_name = nl_langinfo (CODESET); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
832 # else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
833 codeset_name = getenv ("LC_ALL"); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
834 if (codeset_name == NULL || codeset_name[0] == '\0') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
835 codeset_name = getenv ("LC_CTYPE"); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
836 if (codeset_name == NULL || codeset_name[0] == '\0') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
837 codeset_name = getenv ("LANG"); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
838 if (codeset_name == NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
839 codeset_name = ""; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
840 else if (strchr (codeset_name, '.') != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
841 codeset_name = strchr (codeset_name, '.') + 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
842 # endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
843 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
844 if (strcasecmp (codeset_name, "UTF-8") == 0 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
845 || strcasecmp (codeset_name, "UTF8") == 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
846 dfa->is_utf8 = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
847 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
848 /* We check exhaustively in the loop below if this charset is a |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
849 superset of ASCII. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
850 dfa->map_notascii = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
851 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
852 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
853 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
854 if (dfa->mb_cur_max > 1) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
855 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
856 if (dfa->is_utf8) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
857 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
858 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
859 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
860 int i, j, ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
861 |
6119
c3bf2ea44695
Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
862 dfa->sb_char = re_calloc (unsigned int, BITSET_UINTS); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
863 if (BE (dfa->sb_char == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
864 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
865 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
866 /* Clear all bits by, then set those corresponding to single |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
867 byte chars. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
868 bitset_empty (dfa->sb_char); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
869 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
870 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
871 for (j = 0; j < UINT_BITS; ++j, ++ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
872 { |
5972
aa260da0bbbe
* config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents:
5968
diff
changeset
|
873 wint_t wch = __btowc (ch); |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
874 if (wch != WEOF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
875 dfa->sb_char[i] |= 1 << j; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
876 # ifndef _LIBC |
5972
aa260da0bbbe
* config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents:
5968
diff
changeset
|
877 if (isascii (ch) && wch != ch) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
878 dfa->map_notascii = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
879 # endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
880 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
881 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
882 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
883 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
884 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
885 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
886 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
887 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
888 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
889 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
890 /* Initialize WORD_CHAR table, which indicate which character is |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
891 "word". In this case "word" means that it is the word construction |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
892 character used by some operators like "\<", "\>", etc. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
893 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
894 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
895 init_word_char (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
896 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
897 int i, j, ch; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
898 dfa->word_ops_used = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
899 for (i = 0, ch = 0; i < BITSET_UINTS; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
900 for (j = 0; j < UINT_BITS; ++j, ++ch) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
901 if (isalnum (ch) || ch == '_') |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
902 dfa->word_char[i] |= 1 << j; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
903 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
904 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
905 /* Free the work area which are only used while compiling. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
906 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
907 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
908 free_workarea_compile (regex_t *preg) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
909 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
910 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
911 bin_tree_storage_t *storage, *next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
912 for (storage = dfa->str_tree_storage; storage; storage = next) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
913 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
914 next = storage->next; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
915 re_free (storage); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
916 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
917 dfa->str_tree_storage = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
918 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
919 dfa->str_tree = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
920 re_free (dfa->org_indices); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
921 dfa->org_indices = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
922 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
923 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
924 /* Create initial states for all contexts. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
925 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
926 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
927 create_initial_state (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
928 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
929 int first, i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
930 reg_errcode_t err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
931 re_node_set init_nodes; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
932 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
933 /* Initial states have the epsilon closure of the node which is |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
934 the first node of the regular expression. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
935 first = dfa->str_tree->first->node_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
936 dfa->init_node = first; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
937 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
938 if (BE (err != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
939 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
940 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
941 /* The back-references which are in initial states can epsilon transit, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
942 since in this case all of the subexpressions can be null. |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
943 Then we add epsilon closures of the nodes which are the next nodes of |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
944 the back-references. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
945 if (dfa->nbackref > 0) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
946 for (i = 0; i < init_nodes.nelem; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
947 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
948 int node_idx = init_nodes.elems[i]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
949 re_token_type_t type = dfa->nodes[node_idx].type; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
950 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
951 int clexp_idx; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
952 if (type != OP_BACK_REF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
953 continue; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
954 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
955 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
956 re_token_t *clexp_node; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
957 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
958 if (clexp_node->type == OP_CLOSE_SUBEXP |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
959 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
960 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
961 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
962 if (clexp_idx == init_nodes.nelem) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
963 continue; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
964 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
965 if (type == OP_BACK_REF) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
966 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
967 int dest_idx = dfa->edests[node_idx].elems[0]; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
968 if (!re_node_set_contains (&init_nodes, dest_idx)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
969 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
970 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
971 i = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
972 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
973 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
974 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
975 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
976 /* It must be the first time to invoke acquire_state. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
977 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
978 /* We don't check ERR here, since the initial state must not be NULL. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
979 if (BE (dfa->init_state == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
980 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
981 if (dfa->init_state->has_constraint) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
982 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
983 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
984 CONTEXT_WORD); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
985 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
986 CONTEXT_NEWLINE); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
987 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
988 &init_nodes, |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
989 CONTEXT_NEWLINE |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
990 | CONTEXT_BEGBUF); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
991 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
992 || dfa->init_state_begbuf == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
993 return err; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
994 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
995 else |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
996 dfa->init_state_word = dfa->init_state_nl |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
997 = dfa->init_state_begbuf = dfa->init_state; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
998 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
999 re_node_set_free (&init_nodes); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1000 return REG_NOERROR; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1001 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1002 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1003 #ifdef RE_ENABLE_I18N |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1004 /* If it is possible to do searching in single byte encoding instead of UTF-8 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1005 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1006 DFA nodes where needed. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1007 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1008 static void |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1009 optimize_utf8 (re_dfa_t *dfa) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1010 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1011 int node, i, mb_chars = 0, has_period = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1012 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1013 for (node = 0; node < dfa->nodes_len; ++node) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1014 switch (dfa->nodes[node].type) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1015 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1016 case CHARACTER: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1017 if (dfa->nodes[node].opr.c >= 0x80) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1018 mb_chars = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1019 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1020 case ANCHOR: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1021 switch (dfa->nodes[node].opr.idx) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1022 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1023 case LINE_FIRST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1024 case LINE_LAST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1025 case BUF_FIRST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1026 case BUF_LAST: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1027 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1028 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1029 /* Word anchors etc. cannot be handled. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1030 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1031 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1032 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1033 case OP_PERIOD: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1034 has_period = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1035 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1036 case OP_BACK_REF: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1037 case OP_ALT: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1038 case END_OF_RE: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1039 case OP_DUP_ASTERISK: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1040 case OP_OPEN_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1041 case OP_CLOSE_SUBEXP: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1042 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1043 case COMPLEX_BRACKET: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1044 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1045 case SIMPLE_BRACKET: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1046 /* Just double check. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1047 for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1048 if (dfa->nodes[node].opr.sbcset[i]) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1049 return; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1050 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1051 default: |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1052 abort (); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1053 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1054 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1055 if (mb_chars || has_period) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1056 for (node = 0; node < dfa->nodes_len; ++node) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1057 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1058 if (dfa->nodes[node].type == CHARACTER |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1059 && dfa->nodes[node].opr.c >= 0x80) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1060 dfa->nodes[node].mb_partial = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1061 else if (dfa->nodes[node].type == OP_PERIOD) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1062 dfa->nodes[node].type = OP_UTF8_PERIOD; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1063 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1064 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1065 /* The search can be in single byte locale. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1066 dfa->mb_cur_max = 1; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1067 dfa->is_utf8 = 0; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1068 dfa->has_mb_node = dfa->nbackref > 0 || has_period; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1069 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1070 #endif |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1071 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1072 /* Analyze the structure tree, and calculate "first", "next", "edest", |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1073 "eclosure", and "inveclosure". */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1074 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1075 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1076 analyze (regex_t *preg) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1077 { |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
1078 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1079 reg_errcode_t ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1080 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1081 /* Allocate arrays. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1082 dfa->nexts = re_malloc (int, dfa->nodes_alloc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1083 dfa->org_indices = re_malloc (int, dfa->nodes_alloc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1084 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1085 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1086 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1087 || dfa->eclosures == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1088 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1089 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1090 dfa->subexp_map = re_malloc (int, preg->re_nsub); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1091 if (dfa->subexp_map != NULL) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1092 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1093 int i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1094 for (i = 0; i < preg->re_nsub; i++) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1095 dfa->subexp_map[i] = i; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1096 preorder (dfa->str_tree, optimize_subexps, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1097 for (i = 0; i < preg->re_nsub; i++) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1098 if (dfa->subexp_map[i] != i) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1099 break; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1100 if (i == preg->re_nsub) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1101 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1102 free (dfa->subexp_map); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1103 dfa->subexp_map = NULL; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1104 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1105 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1106 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1107 ret = postorder (dfa->str_tree, lower_subexps, preg); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1108 if (BE (ret != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1109 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1110 ret = postorder (dfa->str_tree, calc_first, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1111 if (BE (ret != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1112 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1113 preorder (dfa->str_tree, calc_next, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1114 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1115 if (BE (ret != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1116 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1117 ret = calc_eclosure (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1118 if (BE (ret != REG_NOERROR, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1119 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1120 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1121 /* We only need this during the prune_impossible_nodes pass in regexec.c; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1122 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ |
6101
c9bcd4ba6b3f
* config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6078
diff
changeset
|
1123 if ((!preg->re_no_sub && preg->re_nsub > 0 && dfa->has_plural_match) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1124 || dfa->nbackref) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1125 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1126 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1127 if (BE (dfa->inveclosures == NULL, 0)) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1128 return REG_ESPACE; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1129 ret = calc_inveclosure (dfa); |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1130 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1131 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1132 return ret; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1133 } |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1134 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1135 /* Our parse trees are very unbalanced, so we cannot use a stack to |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1136 implement parse tree visits. Instead, we use parent pointers and |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1137 some hairy code in these two functions. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1138 static reg_errcode_t |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1139 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6071
diff
changeset
|
1140 void *extra) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1141 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1142 bin_tree_t *node, *prev; |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1143 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1144 for (node = root; ; ) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1145 { |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1146 /* Descend down the tree, preferably to the left (or to the right |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff
changeset
|
1147 if that's the only child). */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
|