annotate lib/regcomp.c @ 12570:3ed9d84fee81

regcomp: recognize ill-formed { } expressions * lib/regcomp.c (parse_dup_op): From glibc: http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a87cd2894cb
author Ulrich Drepper <drepper@redhat.com>
date Mon, 04 Jan 2010 10:51:34 +0100
parents 51ea446bf1f8
children 64d47f001127
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1 /* Extended regular expression matching and search library.
12559
c2cbabec01dd update nearly all FSF copyright year lists to include 2010
Jim Meyering <meyering@redhat.com>
parents: 12351
diff changeset
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
c2cbabec01dd update nearly all FSF copyright year lists to include 2010
Jim Meyering <meyering@redhat.com>
parents: 12351
diff changeset
3 Software Foundation, Inc.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4 This file is part of the GNU C Library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
6
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
7 This program is free software; you can redistribute it and/or modify
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
8 it under the terms of the GNU General Public License as published by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
9 the Free Software Foundation; either version 2, or (at your option)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
10 any later version.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
11
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
12 This program is distributed in the hope that it will be useful,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
15 GNU General Public License for more details.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
16
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
17 You should have received a copy of the GNU General Public License along
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
18 with this program; if not, write to the Free Software Foundation,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
20
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
21 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
22 size_t length, reg_syntax_t syntax);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
23 static void re_compile_fastmap_iter (regex_t *bufp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
24 const re_dfastate_t *init_state,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
25 char *fastmap);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
26 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
27 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
28 static void free_charset (re_charset_t *cset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
29 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
30 static void free_workarea_compile (regex_t *preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
31 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
32 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
33 static void optimize_utf8 (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
34 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
35 static reg_errcode_t analyze (regex_t *preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
36 static reg_errcode_t preorder (bin_tree_t *root,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
37 reg_errcode_t (fn (void *, bin_tree_t *)),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
38 void *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
39 static reg_errcode_t postorder (bin_tree_t *root,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
40 reg_errcode_t (fn (void *, bin_tree_t *)),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
41 void *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
42 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
43 static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
44 static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
45 bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
46 static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
47 static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
48 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
49 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
50 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
51 unsigned int constraint);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
52 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
53 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
54 Idx node, bool root);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
55 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
56 static Idx fetch_number (re_string_t *input, re_token_t *token,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
57 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
58 static int peek_token (re_token_t *token, re_string_t *input,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
59 reg_syntax_t syntax) internal_function;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
60 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
61 reg_syntax_t syntax, reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
62 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
63 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
64 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
65 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
66 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
67 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
68 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
69 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
70 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
71 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
72 re_token_t *token, reg_syntax_t syntax,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
73 Idx nest, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
74 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
75 re_dfa_t *dfa, re_token_t *token,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
76 reg_syntax_t syntax, reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
77 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
78 re_token_t *token, reg_syntax_t syntax,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
79 reg_errcode_t *err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
80 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
81 re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
82 re_token_t *token, int token_len,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
83 re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
84 reg_syntax_t syntax,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
85 bool accept_hyphen);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
86 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
87 re_string_t *regexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
88 re_token_t *token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
89 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
90 static reg_errcode_t build_equiv_class (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
91 re_charset_t *mbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
92 Idx *equiv_class_alloc,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
93 const unsigned char *name);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
94 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
95 bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
96 re_charset_t *mbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
97 Idx *char_class_alloc,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
98 const unsigned char *class_name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
99 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
100 #else /* not RE_ENABLE_I18N */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
101 static reg_errcode_t build_equiv_class (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
102 const unsigned char *name);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
103 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
104 bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
105 const unsigned char *class_name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
106 reg_syntax_t syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
107 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
108 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
109 RE_TRANSLATE_TYPE trans,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
110 const unsigned char *class_name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
111 const unsigned char *extra,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
112 bool non_match, reg_errcode_t *err);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
113 static bin_tree_t *create_tree (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
114 bin_tree_t *left, bin_tree_t *right,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
115 re_token_type_t type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
116 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
117 bin_tree_t *left, bin_tree_t *right,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
118 const re_token_t *token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
119 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
120 static void free_token (re_token_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
121 static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
122 static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
123
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
124 /* This table gives an error message for each of the error codes listed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
125 in regex.h. Obviously the order here has to be same as there.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
126 POSIX doesn't require that we do anything for REG_NOERROR,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
127 but why not be nice? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
128
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
129 static const char __re_error_msgid[] =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
130 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
131 #define REG_NOERROR_IDX 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
132 gettext_noop ("Success") /* REG_NOERROR */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
133 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
134 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
135 gettext_noop ("No match") /* REG_NOMATCH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
136 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
137 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
138 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
139 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
140 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
141 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
142 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
143 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
144 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
145 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
146 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
147 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
148 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
149 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
150 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
151 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
152 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
153 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
154 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
155 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
156 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
157 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
158 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
159 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
160 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
161 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
162 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
163 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
164 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
165 gettext_noop ("Invalid range end") /* REG_ERANGE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
166 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
167 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
168 gettext_noop ("Memory exhausted") /* REG_ESPACE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
169 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
170 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
171 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
172 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
173 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
174 gettext_noop ("Premature end of regular expression") /* REG_EEND */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
175 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
176 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
177 gettext_noop ("Regular expression too big") /* REG_ESIZE */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
178 "\0"
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
179 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
180 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
181 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
182
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
183 static const size_t __re_error_msgid_idx[] =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
184 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
185 REG_NOERROR_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
186 REG_NOMATCH_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
187 REG_BADPAT_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
188 REG_ECOLLATE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
189 REG_ECTYPE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
190 REG_EESCAPE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
191 REG_ESUBREG_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
192 REG_EBRACK_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
193 REG_EPAREN_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
194 REG_EBRACE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
195 REG_BADBR_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
196 REG_ERANGE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
197 REG_ESPACE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
198 REG_BADRPT_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
199 REG_EEND_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
200 REG_ESIZE_IDX,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
201 REG_ERPAREN_IDX
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
202 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
203
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
204 /* Entry points for GNU code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
205
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
206 /* re_compile_pattern is the GNU regular expression compiler: it
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
207 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
208 Returns 0 if the pattern was valid, otherwise an error string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
209
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
210 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
211 are set in BUFP on entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
212
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
213 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
214 const char *
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
215 re_compile_pattern (pattern, length, bufp)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
216 const char *pattern;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
217 size_t length;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
218 struct re_pattern_buffer *bufp;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
219 #else /* size_t might promote */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
220 const char *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
221 re_compile_pattern (const char *pattern, size_t length,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
222 struct re_pattern_buffer *bufp)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
223 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
224 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
225 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
226
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
227 /* And GNU code determines whether or not to get register information
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
228 by passing null for the REGS argument to re_match, etc., not by
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
229 setting no_sub, unless RE_NO_SUB is set. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
230 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
231
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
232 /* Match anchors at newline. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
233 bufp->newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
234
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
235 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
236
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
237 if (!ret)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
238 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
239 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
240 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
241 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
242 weak_alias (__re_compile_pattern, re_compile_pattern)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
243 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
244
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
245 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
246 also be assigned to arbitrarily: each pattern buffer stores its own
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
247 syntax, so it can be changed between regex compilations. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
248 /* This has no initializer because initialized variables in Emacs
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
249 become read-only after dumping. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
250 reg_syntax_t re_syntax_options;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
251
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
252
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
253 /* Specify the precise syntax of regexps for compilation. This provides
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
254 for compatibility for various utilities which historically have
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
255 different, incompatible syntaxes.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
256
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
257 The argument SYNTAX is a bit mask comprised of the various bits
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
258 defined in regex.h. We return the old syntax. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
259
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
260 reg_syntax_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
261 re_set_syntax (syntax)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
262 reg_syntax_t syntax;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
263 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
264 reg_syntax_t ret = re_syntax_options;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
265
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
266 re_syntax_options = syntax;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
267 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
268 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
269 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
270 weak_alias (__re_set_syntax, re_set_syntax)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
271 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
272
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
273 int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
274 re_compile_fastmap (bufp)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
275 struct re_pattern_buffer *bufp;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
276 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
277 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
278 char *fastmap = bufp->fastmap;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
279
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
280 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
281 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
282 if (dfa->init_state != dfa->init_state_word)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
283 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
284 if (dfa->init_state != dfa->init_state_nl)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
285 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
286 if (dfa->init_state != dfa->init_state_begbuf)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
287 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
288 bufp->fastmap_accurate = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
289 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
290 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
291 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
292 weak_alias (__re_compile_fastmap, re_compile_fastmap)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
293 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
294
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
295 static inline void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
296 __attribute ((always_inline))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
297 re_set_fastmap (char *fastmap, bool icase, int ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
298 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
299 fastmap[ch] = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
300 if (icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
301 fastmap[tolower (ch)] = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
302 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
303
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
304 /* Helper function for re_compile_fastmap.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
305 Compile fastmap for the initial_state INIT_STATE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
306
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
307 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
308 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
309 char *fastmap)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
310 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
311 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
312 Idx node_cnt;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
313 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
314 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
315 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
316 Idx node = init_state->nodes.elems[node_cnt];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
317 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
318
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
319 if (type == CHARACTER)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
320 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
321 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
322 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
323 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
324 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6104
diff changeset
325 unsigned char buf[MB_LEN_MAX];
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6104
diff changeset
326 unsigned char *p;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
327 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
328 mbstate_t state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
329
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
330 p = buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
331 *p++ = dfa->nodes[node].opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
332 while (++node < dfa->nodes_len
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
333 && dfa->nodes[node].type == CHARACTER
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
334 && dfa->nodes[node].mb_partial)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
335 *p++ = dfa->nodes[node].opr.c;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
336 memset (&state, '\0', sizeof (state));
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10078
diff changeset
337 if (__mbrtowc (&wc, (const char *) buf, p - buf,
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10078
diff changeset
338 &state) == p - buf
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
339 && (__wcrtomb ((char *) buf, towlower (wc), &state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
340 != (size_t) -1))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
341 re_set_fastmap (fastmap, false, buf[0]);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
342 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
343 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
344 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
345 else if (type == SIMPLE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
346 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
347 int i, ch;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
348 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
349 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
350 int j;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
351 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
352 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
353 if (w & ((bitset_word_t) 1 << j))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
354 re_set_fastmap (fastmap, icase, ch);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
355 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
356 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
357 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
358 else if (type == COMPLEX_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
359 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
360 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
361 Idx i;
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
362
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
363 # ifdef _LIBC
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
364 /* See if we have to try all bytes which start multiple collation
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
365 elements.
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
366 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
367 collation element, and don't catch 'b' since 'b' is
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
368 the only collation element which starts from 'b' (and
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
369 it is caught by SIMPLE_BRACKET). */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
370 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
371 && (cset->ncoll_syms || cset->nranges))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
372 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
373 const int32_t *table = (const int32_t *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
375 for (i = 0; i < SBC_MAX; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
376 if (table[i] < 0)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
377 re_set_fastmap (fastmap, icase, i);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
378 }
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
379 # endif /* _LIBC */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
380
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
381 /* See if we have to start the match at all multibyte characters,
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
382 i.e. where we would not find an invalid sequence. This only
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
383 applies to multibyte character sets; for single byte character
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
384 sets, the SIMPLE_BRACKET again suffices. */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
385 if (dfa->mb_cur_max > 1
12351
0b80ccdc9aa4 regex: Fix fastmap for multibyte character ranges.
Paolo Bonzini <bonzini@gnu.org>
parents: 11000
diff changeset
386 && (cset->nchar_classes || cset->non_match || cset->nranges
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
387 # ifdef _LIBC
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
388 || cset->nequiv_classes
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
389 # endif /* _LIBC */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
390 ))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
391 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
392 unsigned char c = 0;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
393 do
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
394 {
11000
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
395 mbstate_t mbs;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
396 memset (&mbs, 0, sizeof (mbs));
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
397 if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
398 re_set_fastmap (fastmap, false, (int) c);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
399 }
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
400 while (++c != 0);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
401 }
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
402
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
403 else
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
404 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
405 /* ... Else catch all bytes which can start the mbchars. */
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
406 for (i = 0; i < cset->nmbchars; ++i)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
407 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
408 char buf[256];
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
409 mbstate_t state;
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
410 memset (&state, '\0', sizeof (state));
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
411 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
412 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
413 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
414 {
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
415 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
416 != (size_t) -1)
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
417 re_set_fastmap (fastmap, false, *(unsigned char *) buf);
683b379e6760 regex: fix glibc bug 9697
Paolo Bonzini <bonzini@gnu.org>
parents: 10998
diff changeset
418 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
419 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
420 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
421 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
422 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
423 else if (type == OP_PERIOD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
424 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
425 || type == OP_UTF8_PERIOD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
426 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
427 || type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
428 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
429 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
430 if (type == END_OF_RE)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
431 bufp->can_be_null = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
432 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
433 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
434 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
435 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
436
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
437 /* Entry point for POSIX code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
438 /* regcomp takes a regular expression as a string and compiles it.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
439
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
440 PREG is a regex_t *. We do not expect any fields to be initialized,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
441 since POSIX says we shouldn't. Thus, we set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
442
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
443 `buffer' to the compiled pattern;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
444 `used' to the length of the compiled pattern;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
445 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
446 REG_EXTENDED bit in CFLAGS is set; otherwise, to
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
447 RE_SYNTAX_POSIX_BASIC;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
448 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
449 `fastmap' to an allocated space for the fastmap;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
450 `fastmap_accurate' to zero;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
451 `re_nsub' to the number of subexpressions in PATTERN.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
452
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
453 PATTERN is the address of the pattern string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
454
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
455 CFLAGS is a series of bits which affect compilation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
456
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
457 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
458 use POSIX basic syntax.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
459
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
460 If REG_NEWLINE is set, then . and [^...] don't match newline.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
461 Also, regexec will try a match beginning after every newline.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
462
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
463 If REG_ICASE is set, then we considers upper- and lowercase
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
464 versions of letters to be equivalent when matching.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
465
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
466 If REG_NOSUB is set, then when PREG is passed to regexec, that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
467 routine will report only success or failure, and nothing about the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
468 registers.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
469
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
470 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
471 the return codes and their meanings.) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
472
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
473 int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
474 regcomp (preg, pattern, cflags)
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
475 regex_t *_Restrict_ preg;
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
476 const char *_Restrict_ pattern;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
477 int cflags;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
478 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
479 reg_errcode_t ret;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
480 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
481 : RE_SYNTAX_POSIX_BASIC);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
482
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
483 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
484 preg->allocated = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
485 preg->used = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
486
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
487 /* Try to allocate space for the fastmap. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
488 preg->fastmap = re_malloc (char, SBC_MAX);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
489 if (BE (preg->fastmap == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
490 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
491
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
492 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
493
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
494 /* If REG_NEWLINE is set, newlines are treated differently. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
495 if (cflags & REG_NEWLINE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
496 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
497 syntax &= ~RE_DOT_NEWLINE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
498 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
499 /* It also changes the matching behavior. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
500 preg->newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
501 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
502 else
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
503 preg->newline_anchor = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
504 preg->no_sub = !!(cflags & REG_NOSUB);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
505 preg->translate = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
506
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
507 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
508
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
509 /* POSIX doesn't distinguish between an unmatched open-group and an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
510 unmatched close-group: both are REG_EPAREN. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
511 if (ret == REG_ERPAREN)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
512 ret = REG_EPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
513
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
514 /* We have already checked preg->fastmap != NULL. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
515 if (BE (ret == REG_NOERROR, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
516 /* Compute the fastmap now, since regexec cannot modify the pattern
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
517 buffer. This function never fails in this implementation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
518 (void) re_compile_fastmap (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
519 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
520 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
521 /* Some error occurred while compiling the expression. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
522 re_free (preg->fastmap);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
523 preg->fastmap = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
524 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
525
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
526 return (int) ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
527 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
528 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
529 weak_alias (__regcomp, regcomp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
530 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
531
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
532 /* Returns a message corresponding to an error code, ERRCODE, returned
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
533 from either regcomp or regexec. We don't use PREG here. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
534
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
535 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
536 size_t
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
537 regerror (errcode, preg, errbuf, errbuf_size)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
538 int errcode;
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
539 const regex_t *_Restrict_ preg;
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
540 char *_Restrict_ errbuf;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
541 size_t errbuf_size;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
542 #else /* size_t might promote */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
543 size_t
8045
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
544 regerror (int errcode, const regex_t *_Restrict_ preg,
7dcf8a1f2f5e * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents: 7694
diff changeset
545 char *_Restrict_ errbuf, size_t errbuf_size)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
546 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
547 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
548 const char *msg;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
549 size_t msg_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
550
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
551 if (BE (errcode < 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
552 || errcode >= (int) (sizeof (__re_error_msgid_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
553 / sizeof (__re_error_msgid_idx[0])), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
554 /* Only error codes returned by the rest of the code should be passed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
555 to this routine. If we are given anything else, or if other regex
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
556 code generates an invalid error code, then the program has a bug.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
557 Dump core so we can fix it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
558 abort ();
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
559
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
560 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
561
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
562 msg_size = strlen (msg) + 1; /* Includes the null. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
563
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
564 if (BE (errbuf_size != 0, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
565 {
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
566 size_t cpy_size = msg_size;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
567 if (BE (msg_size > errbuf_size, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
568 {
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
569 cpy_size = errbuf_size - 1;
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
570 errbuf[cpy_size] = '\0';
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
571 }
8073
eaa00773406b Avoid mempcpy in the regex code, as the string.h mempcpy stuff
Paul Eggert <eggert@cs.ucla.edu>
parents: 8045
diff changeset
572 memcpy (errbuf, msg, cpy_size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
573 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
574
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
575 return msg_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
576 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
577 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
578 weak_alias (__regerror, regerror)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
579 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
580
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
581
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
582 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
583 /* This static array is used for the map to single-byte characters when
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
584 UTF-8 is used. Otherwise we would allocate memory just to initialize
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
585 it the same all the time. UTF-8 is the preferred encoding so this is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
586 a worthwhile optimization. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
587 static const bitset_t utf8_sb_map =
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
588 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
589 /* Set the first 128 bits. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
590 # if 4 * BITSET_WORD_BITS < ASCII_CHARS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
591 # error "bitset_word_t is narrower than 32 bits"
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
592 # elif 3 * BITSET_WORD_BITS < ASCII_CHARS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
593 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
594 # elif 2 * BITSET_WORD_BITS < ASCII_CHARS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
595 BITSET_WORD_MAX, BITSET_WORD_MAX,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
596 # elif 1 * BITSET_WORD_BITS < ASCII_CHARS
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
597 BITSET_WORD_MAX,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
598 # endif
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
599 (BITSET_WORD_MAX
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
600 >> (SBC_MAX % BITSET_WORD_BITS == 0
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
601 ? 0
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
602 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
603 };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
604 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
605
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
606
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
607 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
608 free_dfa_content (re_dfa_t *dfa)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
609 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
610 Idx i, j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
611
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
612 if (dfa->nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
613 for (i = 0; i < dfa->nodes_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
614 free_token (dfa->nodes + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
615 re_free (dfa->nexts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
616 for (i = 0; i < dfa->nodes_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
617 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
618 if (dfa->eclosures != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
619 re_node_set_free (dfa->eclosures + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
620 if (dfa->inveclosures != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
621 re_node_set_free (dfa->inveclosures + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
622 if (dfa->edests != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
623 re_node_set_free (dfa->edests + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
624 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
625 re_free (dfa->edests);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
626 re_free (dfa->eclosures);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
627 re_free (dfa->inveclosures);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
628 re_free (dfa->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
629
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
630 if (dfa->state_table)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
631 for (i = 0; i <= dfa->state_hash_mask; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
632 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
633 struct re_state_table_entry *entry = dfa->state_table + i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
634 for (j = 0; j < entry->num; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
635 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
636 re_dfastate_t *state = entry->array[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
637 free_state (state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
638 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
639 re_free (entry->array);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
640 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
641 re_free (dfa->state_table);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
642 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
643 if (dfa->sb_char != utf8_sb_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
644 re_free (dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
645 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
646 re_free (dfa->subexp_map);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
647 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
648 re_free (dfa->re_str);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
649 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
650
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
651 re_free (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
652 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
653
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
654
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
655 /* Free dynamically allocated space used by PREG. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
656
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
657 void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
658 regfree (preg)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
659 regex_t *preg;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
660 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
661 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
662 if (BE (dfa != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
663 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
664 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
665 preg->allocated = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
666
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
667 re_free (preg->fastmap);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
668 preg->fastmap = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
669
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
670 re_free (preg->translate);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
671 preg->translate = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
672 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
673 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
674 weak_alias (__regfree, regfree)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
675 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
676
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
677 /* Entry points compatible with 4.2 BSD regex library. We don't define
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
678 them unless specifically requested. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
679
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
680 #if defined _REGEX_RE_COMP || defined _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
681
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
682 /* BSD has one and only one pattern buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
683 static struct re_pattern_buffer re_comp_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
684
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
685 char *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
686 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
687 /* Make these definitions weak in libc, so POSIX programs can redefine
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
688 these names if they don't use our functions, and still use
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
689 regcomp/regexec above without link errors. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
690 weak_function
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
691 # endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
692 re_comp (s)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
693 const char *s;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
694 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
695 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
696 char *fastmap;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
697
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
698 if (!s)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
699 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
700 if (!re_comp_buf.buffer)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
701 return gettext ("No previous regular expression");
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
702 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
703 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
704
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
705 if (re_comp_buf.buffer)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
706 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
707 fastmap = re_comp_buf.fastmap;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
708 re_comp_buf.fastmap = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
709 __regfree (&re_comp_buf);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
710 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
711 re_comp_buf.fastmap = fastmap;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
712 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
713
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
714 if (re_comp_buf.fastmap == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
715 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
716 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
717 if (re_comp_buf.fastmap == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
718 return (char *) gettext (__re_error_msgid
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
719 + __re_error_msgid_idx[(int) REG_ESPACE]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
720 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
721
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
722 /* Since `re_exec' always passes NULL for the `regs' argument, we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
723 don't need to initialize the pattern buffer fields which affect it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
724
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
725 /* Match anchors at newlines. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
726 re_comp_buf.newline_anchor = 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
727
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
728 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
729
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
730 if (!ret)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
731 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
732
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
733 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
734 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
735 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
736
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
737 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
738 libc_freeres_fn (free_mem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
739 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
740 __regfree (&re_comp_buf);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
741 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
742 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
743
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
744 #endif /* _REGEX_RE_COMP */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
745
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
746 /* Internal entry point.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
747 Compile the regular expression PATTERN, whose length is LENGTH.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
748 SYNTAX indicate regular expression's syntax. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
749
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
750 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
751 re_compile_internal (regex_t *preg, const char * pattern, size_t length,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
752 reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
753 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
754 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
755 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
756 re_string_t regexp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
757
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
758 /* Initialize the pattern buffer. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
759 preg->fastmap_accurate = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
760 preg->syntax = syntax;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
761 preg->not_bol = preg->not_eol = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
762 preg->used = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
763 preg->re_nsub = 0;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
764 preg->can_be_null = 0;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
765 preg->regs_allocated = REGS_UNALLOCATED;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
766
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
767 /* Initialize the dfa. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
768 dfa = (re_dfa_t *) preg->buffer;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
769 if (BE (preg->allocated < sizeof (re_dfa_t), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
770 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
771 /* If zero allocated, but buffer is non-null, try to realloc
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
772 enough space. This loses if buffer's address is bogus, but
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
773 that is the user's responsibility. If ->buffer is NULL this
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
774 is a simple allocation. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
775 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
776 if (dfa == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
777 return REG_ESPACE;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
778 preg->allocated = sizeof (re_dfa_t);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
779 preg->buffer = (unsigned char *) dfa;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
780 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
781 preg->used = sizeof (re_dfa_t);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
782
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
783 err = init_dfa (dfa, length);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
784 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
785 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
786 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
787 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
788 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
789 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
790 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
791 #ifdef DEBUG
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
792 /* Note: length+1 will not overflow since it is checked in init_dfa. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
793 dfa->re_str = re_malloc (char, length + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
794 strncpy (dfa->re_str, pattern, length + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
795 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
796
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
797 __libc_lock_init (dfa->lock);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
798
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
799 err = re_string_construct (&regexp, pattern, length, preg->translate,
10078
f47c913858de Fix violation of <stdbool.h> replacement in regex.
Eric Blake <ebb9@byu.net>
parents: 10075
diff changeset
800 (syntax & RE_ICASE) != 0, dfa);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
801 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
802 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
803 re_compile_internal_free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
804 free_workarea_compile (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
805 re_string_destruct (&regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
806 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
807 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
808 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
809 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
810 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
811
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
812 /* Parse the regular expression, and build a structure tree. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
813 preg->re_nsub = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
814 dfa->str_tree = parse (&regexp, preg, syntax, &err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
815 if (BE (dfa->str_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
816 goto re_compile_internal_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
817
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
818 /* Analyze the tree and create the nfa. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
819 err = analyze (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
820 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
821 goto re_compile_internal_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
822
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
823 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
824 /* If possible, do searching in single byte encoding to speed things up. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
825 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
826 optimize_utf8 (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
827 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
828
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
829 /* Then create the initial state of the dfa. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
830 err = create_initial_state (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
831
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
832 /* Release work areas. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
833 free_workarea_compile (preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
834 re_string_destruct (&regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
835
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
836 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
837 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
838 free_dfa_content (dfa);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
839 preg->buffer = NULL;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
840 preg->allocated = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
841 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
842
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
843 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
844 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
845
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
846 /* Initialize DFA. We use the length of the regular expression PAT_LEN
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
847 as the initial length of some arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
848
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
849 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
850 init_dfa (re_dfa_t *dfa, size_t pat_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
851 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
852 __re_size_t table_size;
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
853 #ifndef _LIBC
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
854 char *codeset_name;
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
855 #endif
6733
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
856 #ifdef RE_ENABLE_I18N
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
857 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
858 #else
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
859 size_t max_i18n_object_size = 0;
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
860 #endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
861 size_t max_object_size =
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
862 MAX (sizeof (struct re_state_table_entry),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
863 MAX (sizeof (re_token_t),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
864 MAX (sizeof (re_node_set),
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
865 MAX (sizeof (regmatch_t),
6733
1c9a307d93bd * regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents: 6729
diff changeset
866 max_i18n_object_size))));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
867
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
868 memset (dfa, '\0', sizeof (re_dfa_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
869
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
870 /* Force allocation of str_tree_storage the first time. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
871 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
872
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
873 /* Avoid overflows. The extra "/ 2" is for the table_size doubling
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
874 calculation below, and for similar doubling calculations
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
875 elsewhere. And it's <= rather than <, because some of the
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
876 doubling calculations add 1 afterwards. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
877 if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
878 return REG_ESPACE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
879
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
880 dfa->nodes_alloc = pat_len + 1;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
881 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
882
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
883 /* table_size = 2 ^ ceil(log pat_len) */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
884 for (table_size = 1; ; table_size <<= 1)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
885 if (table_size > pat_len)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
886 break;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
887
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
888 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
889 dfa->state_hash_mask = table_size - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
890
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
891 dfa->mb_cur_max = MB_CUR_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
892 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
893 if (dfa->mb_cur_max == 6
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
894 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
895 dfa->is_utf8 = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
896 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
897 != 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
898 #else
12567
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
899 codeset_name = nl_langinfo (CODESET);
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
900 if (strcasecmp (codeset_name, "UTF-8") == 0
ceb1562f60a5 regcomp: sync from glibc; always use nl_langinfo
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
901 || strcasecmp (codeset_name, "UTF8") == 0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
902 dfa->is_utf8 = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
903
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
904 /* We check exhaustively in the loop below if this charset is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
905 superset of ASCII. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
906 dfa->map_notascii = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
907 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
908
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
909 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
910 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
911 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
912 if (dfa->is_utf8)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
913 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
914 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
915 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
916 int i, j, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
917
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
918 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
919 if (BE (dfa->sb_char == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
920 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
921
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
922 /* Set the bits corresponding to single byte chars. */
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
923 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
924 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
925 {
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
926 wint_t wch = __btowc (ch);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
927 if (wch != WEOF)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
928 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
929 # ifndef _LIBC
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
930 if (isascii (ch) && wch != ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
931 dfa->map_notascii = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
932 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
933 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
934 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
935 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
936 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
937
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
938 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
939 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
940 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
941 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
942
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
943 /* Initialize WORD_CHAR table, which indicate which character is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
944 "word". In this case "word" means that it is the word construction
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
945 character used by some operators like "\<", "\>", etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
946
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
947 static void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
948 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
949 init_word_char (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
950 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
951 int i, j, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
952 dfa->word_ops_used = 1;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
953 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
954 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
955 if (isalnum (ch) || ch == '_')
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
956 dfa->word_char[i] |= (bitset_word_t) 1 << j;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
957 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
958
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
959 /* Free the work area which are only used while compiling. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
960
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
961 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
962 free_workarea_compile (regex_t *preg)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
963 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
964 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
965 bin_tree_storage_t *storage, *next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
966 for (storage = dfa->str_tree_storage; storage; storage = next)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
967 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
968 next = storage->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
969 re_free (storage);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
970 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
971 dfa->str_tree_storage = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
972 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
973 dfa->str_tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
974 re_free (dfa->org_indices);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
975 dfa->org_indices = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
976 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
977
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
978 /* Create initial states for all contexts. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
979
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
980 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
981 create_initial_state (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
982 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
983 Idx first, i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
984 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
985 re_node_set init_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
986
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
987 /* Initial states have the epsilon closure of the node which is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
988 the first node of the regular expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
989 first = dfa->str_tree->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
990 dfa->init_node = first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
991 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
992 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
993 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
994
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
995 /* The back-references which are in initial states can epsilon transit,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
996 since in this case all of the subexpressions can be null.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
997 Then we add epsilon closures of the nodes which are the next nodes of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
998 the back-references. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
999 if (dfa->nbackref > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1000 for (i = 0; i < init_nodes.nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1001 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1002 Idx node_idx = init_nodes.elems[i];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1003 re_token_type_t type = dfa->nodes[node_idx].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1004
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1005 Idx clexp_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1006 if (type != OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1007 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1008 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1009 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1010 re_token_t *clexp_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1011 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1012 if (clexp_node->type == OP_CLOSE_SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1013 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1014 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1015 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1016 if (clexp_idx == init_nodes.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1017 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1018
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1019 if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1020 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1021 Idx dest_idx = dfa->edests[node_idx].elems[0];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1022 if (!re_node_set_contains (&init_nodes, dest_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1023 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1024 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1025 i = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1026 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1027 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1028 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1029
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1030 /* It must be the first time to invoke acquire_state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1031 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1032 /* We don't check ERR here, since the initial state must not be NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1033 if (BE (dfa->init_state == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1034 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1035 if (dfa->init_state->has_constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1036 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1037 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1038 CONTEXT_WORD);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1039 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1040 CONTEXT_NEWLINE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1041 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1042 &init_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1043 CONTEXT_NEWLINE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1044 | CONTEXT_BEGBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1045 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1046 || dfa->init_state_begbuf == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1047 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1048 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1049 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1050 dfa->init_state_word = dfa->init_state_nl
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1051 = dfa->init_state_begbuf = dfa->init_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1052
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1053 re_node_set_free (&init_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1054 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1055 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1056
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1057 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1058 /* If it is possible to do searching in single byte encoding instead of UTF-8
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1059 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1060 DFA nodes where needed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1061
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1062 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1063 optimize_utf8 (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1064 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1065 Idx node;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1066 int i;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1067 bool mb_chars = false;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1068 bool has_period = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1069
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1070 for (node = 0; node < dfa->nodes_len; ++node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1071 switch (dfa->nodes[node].type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1072 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1073 case CHARACTER:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1074 if (dfa->nodes[node].opr.c >= ASCII_CHARS)
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1075 mb_chars = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1076 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1077 case ANCHOR:
9494
7cd817e07a16 Fix a 4-year-old used-uninitialized bug in regcomp.c.
Jim Meyering <meyering@redhat.com>
parents: 8153
diff changeset
1078 switch (dfa->nodes[node].opr.ctx_type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1079 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1080 case LINE_FIRST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1081 case LINE_LAST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1082 case BUF_FIRST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1083 case BUF_LAST:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1084 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1085 default:
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1086 /* Word anchors etc. cannot be handled. It's okay to test
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1087 opr.ctx_type since constraints (for all DFA nodes) are
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1088 created by ORing one or more opr.ctx_type values. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1089 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1090 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1091 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1092 case OP_PERIOD:
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1093 has_period = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1094 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1095 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1096 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1097 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1098 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1099 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1100 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1101 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1102 case COMPLEX_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1103 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1104 case SIMPLE_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1105 /* Just double check. */
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1106 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1107 int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1108 ? 0
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1109 : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1110 for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1111 {
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1112 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1113 return;
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1114 rshift = 0;
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1115 }
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1116 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1117 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1118 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1119 abort ();
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1120 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1121
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1122 if (mb_chars || has_period)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1123 for (node = 0; node < dfa->nodes_len; ++node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1124 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1125 if (dfa->nodes[node].type == CHARACTER
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1126 && dfa->nodes[node].opr.c >= ASCII_CHARS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1127 dfa->nodes[node].mb_partial = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1128 else if (dfa->nodes[node].type == OP_PERIOD)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1129 dfa->nodes[node].type = OP_UTF8_PERIOD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1130 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1131
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1132 /* The search can be in single byte locale. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1133 dfa->mb_cur_max = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1134 dfa->is_utf8 = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1135 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1136 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1137 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1138
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1139 /* Analyze the structure tree, and calculate "first", "next", "edest",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1140 "eclosure", and "inveclosure". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1141
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1142 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1143 analyze (regex_t *preg)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1144 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1145 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1146 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1147
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1148 /* Allocate arrays. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1149 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1150 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1151 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1152 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1153 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1154 || dfa->eclosures == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1155 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1156
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1157 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1158 if (dfa->subexp_map != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1159 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1160 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1161 for (i = 0; i < preg->re_nsub; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1162 dfa->subexp_map[i] = i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1163 preorder (dfa->str_tree, optimize_subexps, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1164 for (i = 0; i < preg->re_nsub; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1165 if (dfa->subexp_map[i] != i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1166 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1167 if (i == preg->re_nsub)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1168 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1169 free (dfa->subexp_map);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1170 dfa->subexp_map = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1171 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1172 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1173
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1174 ret = postorder (dfa->str_tree, lower_subexps, preg);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1175 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1176 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1177 ret = postorder (dfa->str_tree, calc_first, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1178 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1179 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1180 preorder (dfa->str_tree, calc_next, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1181 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1182 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1183 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1184 ret = calc_eclosure (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1185 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1186 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1187
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1188 /* We only need this during the prune_impossible_nodes pass in regexec.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1189 skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1190 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1191 || dfa->nbackref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1192 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1193 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1194 if (BE (dfa->inveclosures == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1195 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1196 ret = calc_inveclosure (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1197 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1198
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1199 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1200 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1201
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1202 /* Our parse trees are very unbalanced, so we cannot use a stack to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1203 implement parse tree visits. Instead, we use parent pointers and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1204 some hairy code in these two functions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1205 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1206 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1207 void *extra)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1208 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1209 bin_tree_t *node, *prev;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1210
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1211 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1212 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1213 /* Descend down the tree, preferably to the left (or to the right
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1214 if that's the only child). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1215 while (node->left || node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1216 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1217 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1218 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1219 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1220
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1221 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1222 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1223 reg_errcode_t err = fn (extra, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1224 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1225 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1226 if (node->parent == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1227 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1228 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1229 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1230 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1231 /* Go up while we have a node that is reached from the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1232 while (node->right == prev || node->right == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1233 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1234 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1235 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1236
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1237 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1238 preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1239 void *extra)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1240 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1241 bin_tree_t *node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1242
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1243 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1244 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1245 reg_errcode_t err = fn (extra, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1246 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1247 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1248
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1249 /* Go to the left node, or up and to the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1250 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1251 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1252 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1253 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1254 bin_tree_t *prev = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1255 while (node->right == prev || node->right == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1256 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1257 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1258 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1259 if (!node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1260 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1261 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1262 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1263 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1264 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1265 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1266
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1267 /* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1268 re_search_internal to map the inner one's opr.idx to this one's. Adjust
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1269 backreferences as well. Requires a preorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1270 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1271 optimize_subexps (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1272 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1273 re_dfa_t *dfa = (re_dfa_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1274
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1275 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1276 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1277 int idx = node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1278 node->token.opr.idx = dfa->subexp_map[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1279 dfa->used_bkref_map |= 1 << node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1280 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1281
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1282 else if (node->token.type == SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1283 && node->left && node->left->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1284 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1285 Idx other_idx = node->left->token.opr.idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1286
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1287 node->left = node->left->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1288 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1289 node->left->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1290
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1291 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
1292 if (other_idx < BITSET_WORD_BITS)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1293 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1294 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1295
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1296 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1297 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1298
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1299 /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1300 of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1301 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1302 lower_subexps (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1303 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1304 regex_t *preg = (regex_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1305 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1306
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1307 if (node->left && node->left->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1308 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1309 node->left = lower_subexp (&err, preg, node->left);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1310 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1311 node->left->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1312 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1313 if (node->right && node->right->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1314 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1315 node->right = lower_subexp (&err, preg, node->right);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1316 if (node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1317 node->right->parent = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1318 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1319
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1320 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1321 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1322
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1323 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1324 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1325 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1326 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1327 bin_tree_t *body = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1328 bin_tree_t *op, *cls, *tree1, *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1329
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1330 if (preg->no_sub
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1331 /* We do not optimize empty subexpressions, because otherwise we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1332 have bad CONCAT nodes with NULL children. This is obviously not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1333 very common, so we do not lose much. An example that triggers
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1334 this case is the sed "script" /\(\)/x. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1335 && node->left != NULL
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1336 && (node->token.opr.idx >= BITSET_WORD_BITS
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1337 || !(dfa->used_bkref_map
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1338 & ((bitset_word_t) 1 << node->token.opr.idx))))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1339 return node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1340
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1341 /* Convert the SUBEXP node to the concatenation of an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1342 OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1343 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1344 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1345 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1346 tree = create_tree (dfa, op, tree1, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1347 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1348 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1349 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1350 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1351 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1352
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1353 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1354 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1355 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1356 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1357
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1358 /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1359 nodes. Requires a postorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1360 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1361 calc_first (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1362 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1363 re_dfa_t *dfa = (re_dfa_t *) extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1364 if (node->token.type == CONCAT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1365 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1366 node->first = node->left->first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1367 node->node_idx = node->left->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1368 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1369 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1370 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1371 node->first = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1372 node->node_idx = re_dfa_add_node (dfa, node->token);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1373 if (BE (node->node_idx == REG_MISSING, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1374 return REG_ESPACE;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1375 if (node->token.type == ANCHOR)
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1376 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1377 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1378 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1379 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1380
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1381 /* Pass 2: compute NEXT on the tree. Preorder visit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1382 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1383 calc_next (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1384 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1385 switch (node->token.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1386 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1387 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1388 node->left->next = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1389 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1390 case CONCAT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1391 node->left->next = node->right->first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1392 node->right->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1393 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1394 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1395 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1396 node->left->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1397 if (node->right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1398 node->right->next = node->next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1399 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1400 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1401 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1402 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1403
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1404 /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1405 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1406 link_nfa_nodes (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1407 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1408 re_dfa_t *dfa = (re_dfa_t *) extra;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1409 Idx idx = node->node_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1410 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1411
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1412 switch (node->token.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1413 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1414 case CONCAT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1415 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1416
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1417 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1418 assert (node->next == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1419 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1420
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1421 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1422 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1423 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1424 Idx left, right;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1425 dfa->has_plural_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1426 if (node->left != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1427 left = node->left->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1428 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1429 left = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1430 if (node->right != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1431 right = node->right->first->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1432 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1433 right = node->next->node_idx;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1434 assert (REG_VALID_INDEX (left));
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1435 assert (REG_VALID_INDEX (right));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1436 err = re_node_set_init_2 (dfa->edests + idx, left, right);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1437 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1438 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1439
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1440 case ANCHOR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1441 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1442 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1443 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1444 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1445
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1446 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1447 dfa->nexts[idx] = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1448 if (node->token.type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1449 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1450 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1451
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1452 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1453 assert (!IS_EPSILON_NODE (node->token.type));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1454 dfa->nexts[idx] = node->next->node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1455 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1456 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1457
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1458 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1459 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1460
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1461 /* Duplicate the epsilon closure of the node ROOT_NODE.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1462 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1463 to their own constraint. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1464
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1465 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1466 internal_function
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1467 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1468 Idx root_node, unsigned int init_constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1469 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1470 Idx org_node, clone_node;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1471 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1472 unsigned int constraint = init_constraint;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1473 for (org_node = top_org_node, clone_node = top_clone_node;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1474 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1475 Idx org_dest, clone_dest;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1476 if (dfa->nodes[org_node].type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1477 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1478 /* If the back reference epsilon-transit, its destination must
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1479 also have the constraint. Then duplicate the epsilon closure
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1480 of the destination of the back reference, and store it in
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1481 edests of the back reference. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1482 org_dest = dfa->nexts[org_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1483 re_node_set_empty (dfa->edests + clone_node);
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1484 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1485 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1486 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1487 dfa->nexts[clone_node] = dfa->nexts[org_node];
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1488 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1489 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1490 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1491 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1492 else if (dfa->edests[org_node].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1493 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1494 /* In case of the node can't epsilon-transit, don't duplicate the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1495 destination and store the original destination as the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1496 destination of the node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1497 dfa->nexts[clone_node] = dfa->nexts[org_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1498 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1499 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1500 else if (dfa->edests[org_node].nelem == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1501 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1502 /* In case of the node can epsilon-transit, and it has only one
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1503 destination. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1504 org_dest = dfa->edests[org_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1505 re_node_set_empty (dfa->edests + clone_node);
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1506 /* If the node is root_node itself, it means the epsilon closure
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1507 has a loop. Then tie it to the destination of the root_node. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1508 if (org_node == root_node && clone_node != org_node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1509 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1510 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1511 if (BE (! ok, 0))
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1512 return REG_ESPACE;
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1513 break;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1514 }
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1515 /* In case the node has another constraint, append it. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1516 constraint |= dfa->nodes[org_node].constraint;
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1517 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1518 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1519 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1520 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1521 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1522 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1523 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1524 else /* dfa->edests[org_node].nelem == 2 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1525 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1526 /* In case of the node can epsilon-transit, and it has two
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1527 destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1528 org_dest = dfa->edests[org_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1529 re_node_set_empty (dfa->edests + clone_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1530 /* Search for a duplicated node which satisfies the constraint. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1531 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1532 if (clone_dest == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1533 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1534 /* There is no such duplicated node, create a new one. */
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1535 reg_errcode_t err;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1536 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1537 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1538 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1539 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1540 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1541 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1542 err = duplicate_node_closure (dfa, org_dest, clone_dest,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1543 root_node, constraint);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1544 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1545 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1546 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1547 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1548 {
12569
51ea446bf1f8 regcomp: fix typo in comment
Jim Meyering <meyering@redhat.com>
parents: 12568
diff changeset
1549 /* There is a duplicated node which satisfies the constraint,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1550 use it to avoid infinite loop. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1551 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1552 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1553 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1554 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1555
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1556 org_dest = dfa->edests[org_node].elems[1];
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1557 clone_dest = duplicate_node (dfa, org_dest, constraint);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1558 if (BE (clone_dest == REG_MISSING, 0))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1559 return REG_ESPACE;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1560 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1561 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1562 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1563 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1564 org_node = org_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1565 clone_node = clone_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1566 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1567 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1568 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1569
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1570 /* Search for a node which is duplicated from the node ORG_NODE, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1571 satisfies the constraint CONSTRAINT. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1572
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1573 static Idx
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
1574 search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1575 unsigned int constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1576 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1577 Idx idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1578 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1579 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1580 if (org_node == dfa->org_indices[idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1581 && constraint == dfa->nodes[idx].constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1582 return idx; /* Found. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1583 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1584 return REG_MISSING; /* Not found. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1585 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1586
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1587 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1588 Return the index of the new node, or REG_MISSING if insufficient storage is
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1589 available. */
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1590
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1591 static Idx
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1592 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1593 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1594 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1595 if (BE (dup_idx != REG_MISSING, 1))
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1596 {
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1597 dfa->nodes[dup_idx].constraint = constraint;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1598 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
6071
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1599 dfa->nodes[dup_idx].duplicated = 1;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1600
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1601 /* Store the index of the original node. */
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1602 dfa->org_indices[dup_idx] = org_idx;
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1603 }
c1760162e42f (duplicate_node): Return new index, not an error code,
Paul Eggert <eggert@cs.ucla.edu>
parents: 5972
diff changeset
1604 return dup_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1605 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1606
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1607 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1608 calc_inveclosure (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1609 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1610 Idx src, idx;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1611 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1612 for (idx = 0; idx < dfa->nodes_len; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1613 re_node_set_init_empty (dfa->inveclosures + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1614
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1615 for (src = 0; src < dfa->nodes_len; ++src)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1616 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1617 Idx *elems = dfa->eclosures[src].elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1618 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1619 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1620 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1621 if (BE (! ok, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1622 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1623 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1624 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1625
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1626 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1627 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1628
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1629 /* Calculate "eclosure" for all the node in DFA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1630
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1631 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1632 calc_eclosure (re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1633 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1634 Idx node_idx;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1635 bool incomplete;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1636 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1637 assert (dfa->nodes_len > 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1638 #endif
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1639 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1640 /* For each nodes, calculate epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1641 for (node_idx = 0; ; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1642 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1643 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1644 re_node_set eclosure_elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1645 if (node_idx == dfa->nodes_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1646 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1647 if (!incomplete)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1648 break;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1649 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1650 node_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1651 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1652
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1653 #ifdef DEBUG
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1654 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1655 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1656
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1657 /* If we have already calculated, skip it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1658 if (dfa->eclosures[node_idx].nelem != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1659 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1660 /* Calculate epsilon closure of `node_idx'. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1661 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1662 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1663 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1664
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1665 if (dfa->eclosures[node_idx].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1666 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1667 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1668 re_node_set_free (&eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1669 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1670 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1671 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1672 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1673
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1674 /* Calculate epsilon closure of NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1675
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1676 static reg_errcode_t
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1677 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1678 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1679 reg_errcode_t err;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1680 Idx i;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1681 bool incomplete;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1682 bool ok;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1683 re_node_set eclosure;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1684 incomplete = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1685 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1686 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1687 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1688
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1689 /* This indicates that we are calculating this node now.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1690 We reference this value to avoid infinite loop. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1691 dfa->eclosures[node].nelem = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1692
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1693 /* If the current node has constraints, duplicate all nodes
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1694 since they must inherit the constraints. */
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1695 if (dfa->nodes[node].constraint
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1696 && dfa->edests[node].nelem
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1697 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1698 {
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1699 err = duplicate_node_closure (dfa, node, node, node,
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 9494
diff changeset
1700 dfa->nodes[node].constraint);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1701 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1702 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1703 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1704
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1705 /* Expand each epsilon destination nodes. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1706 if (IS_EPSILON_NODE(dfa->nodes[node].type))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1707 for (i = 0; i < dfa->edests[node].nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1708 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1709 re_node_set eclosure_elem;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1710 Idx edest = dfa->edests[node].elems[i];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1711 /* If calculating the epsilon closure of `edest' is in progress,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1712 return intermediate result. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
1713 if (dfa->eclosures[edest].nelem == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1714 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1715 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1716 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1717 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1718 /* If we haven't calculated the epsilon closure of `edest' yet,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1719 calculate now. Otherwise use calculated epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1720 if (dfa->eclosures[edest].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1721 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1722 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1723 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1724 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1725 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1726 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1727 eclosure_elem = dfa->eclosures[edest];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1728 /* Merge the epsilon closure of `edest'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1729 re_node_set_merge (&eclosure, &eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1730 /* If the epsilon closure of `edest' is incomplete,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1731 the epsilon closure of this node is also incomplete. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1732 if (dfa->eclosures[edest].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1733 {
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1734 incomplete = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1735 re_node_set_free (&eclosure_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1736 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1737 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1738
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1739 /* Epsilon closures include itself. */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1740 ok = re_node_set_insert (&eclosure, node);
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1741 if (BE (! ok, 0))
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
1742 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1743 if (incomplete && !root)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1744 dfa->eclosures[node].nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1745 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1746 dfa->eclosures[node] = eclosure;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1747 *new_set = eclosure;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1748 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1749 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1750
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1751 /* Functions for token which are used in the parser. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1752
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1753 /* Fetch a token from INPUT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1754 We must not use this function inside bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1755
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1756 static void
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1757 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1758 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1759 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1760 re_string_skip_bytes (input, peek_token (result, input, syntax));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1761 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1762
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1763 /* Peek a token from INPUT, and return the length of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1764 We must not use this function inside bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1765
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1766 static int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1767 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
1768 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1769 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1770 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1771
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1772 if (re_string_eoi (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1773 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1774 token->type = END_OF_RE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1775 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1776 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1777
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1778 c = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1779 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1780
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1781 token->word_char = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1782 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1783 token->mb_partial = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1784 if (input->mb_cur_max > 1 &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1785 !re_string_first_byte (input, re_string_cur_idx (input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1786 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1787 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1788 token->mb_partial = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1789 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1790 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1791 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1792 if (c == '\\')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1793 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1794 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1795 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1796 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1797 token->type = BACK_SLASH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1798 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1799 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1800
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1801 c2 = re_string_peek_byte_case (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1802 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1803 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1804 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1805 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1806 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1807 wint_t wc = re_string_wchar_at (input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1808 re_string_cur_idx (input) + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1809 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1810 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1811 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1812 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1813 token->word_char = IS_WORD_CHAR (c2) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1814
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1815 switch (c2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1816 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1817 case '|':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1818 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1819 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1820 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1821 case '1': case '2': case '3': case '4': case '5':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1822 case '6': case '7': case '8': case '9':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1823 if (!(syntax & RE_NO_BK_REFS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1824 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1825 token->type = OP_BACK_REF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1826 token->opr.idx = c2 - '1';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1827 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1828 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1829 case '<':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1830 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1831 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1832 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1833 token->opr.ctx_type = WORD_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1834 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1835 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1836 case '>':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1837 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1838 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1839 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1840 token->opr.ctx_type = WORD_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1841 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1842 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1843 case 'b':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1844 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1845 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1846 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1847 token->opr.ctx_type = WORD_DELIM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1848 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1849 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1850 case 'B':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1851 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1852 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1853 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1854 token->opr.ctx_type = NOT_WORD_DELIM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1855 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1856 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1857 case 'w':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1858 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1859 token->type = OP_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1860 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1861 case 'W':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1862 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1863 token->type = OP_NOTWORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1864 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1865 case 's':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1866 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1867 token->type = OP_SPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1868 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1869 case 'S':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1870 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1871 token->type = OP_NOTSPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1872 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1873 case '`':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1874 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1875 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1876 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1877 token->opr.ctx_type = BUF_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1878 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1879 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1880 case '\'':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1881 if (!(syntax & RE_NO_GNU_OPS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1882 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1883 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1884 token->opr.ctx_type = BUF_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1885 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1886 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1887 case '(':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1888 if (!(syntax & RE_NO_BK_PARENS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1889 token->type = OP_OPEN_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1890 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1891 case ')':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1892 if (!(syntax & RE_NO_BK_PARENS))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1893 token->type = OP_CLOSE_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1894 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1895 case '+':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1896 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1897 token->type = OP_DUP_PLUS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1898 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1899 case '?':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1900 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1901 token->type = OP_DUP_QUESTION;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1902 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1903 case '{':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1904 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1905 token->type = OP_OPEN_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1906 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1907 case '}':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1908 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1909 token->type = OP_CLOSE_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1910 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1911 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1912 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1913 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1914 return 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1915 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1916
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1917 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1918 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1919 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1920 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1921 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1922 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1923 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1924 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1925 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1926 token->word_char = IS_WORD_CHAR (token->opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1927
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1928 switch (c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1929 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1930 case '\n':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1931 if (syntax & RE_NEWLINE_ALT)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1932 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1933 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1934 case '|':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1935 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1936 token->type = OP_ALT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1937 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1938 case '*':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1939 token->type = OP_DUP_ASTERISK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1940 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1941 case '+':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1942 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1943 token->type = OP_DUP_PLUS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1944 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1945 case '?':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1946 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1947 token->type = OP_DUP_QUESTION;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1948 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1949 case '{':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1950 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1951 token->type = OP_OPEN_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1952 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1953 case '}':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1954 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1955 token->type = OP_CLOSE_DUP_NUM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1956 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1957 case '(':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1958 if (syntax & RE_NO_BK_PARENS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1959 token->type = OP_OPEN_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1960 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1961 case ')':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1962 if (syntax & RE_NO_BK_PARENS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1963 token->type = OP_CLOSE_SUBEXP;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1964 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1965 case '[':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1966 token->type = OP_OPEN_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1967 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1968 case '.':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1969 token->type = OP_PERIOD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1970 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1971 case '^':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1972 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1973 re_string_cur_idx (input) != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1974 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1975 char prev = re_string_peek_byte (input, -1);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1976 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1977 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1978 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1979 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1980 token->opr.ctx_type = LINE_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1981 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1982 case '$':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1983 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1984 re_string_cur_idx (input) + 1 != re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1985 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1986 re_token_t next;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1987 re_string_skip_bytes (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1988 peek_token (&next, input, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1989 re_string_skip_bytes (input, -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1990 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1991 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1992 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1993 token->type = ANCHOR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1994 token->opr.ctx_type = LINE_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1995 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1996 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1997 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1998 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1999 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2000 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2001
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2002 /* Peek a token from INPUT, and return the length of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2003 We must not use this function out of bracket expressions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2004
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2005 static int
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2006 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2007 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2008 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2009 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2010 if (re_string_eoi (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2011 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2012 token->type = END_OF_RE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2013 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2014 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2015 c = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2016 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2017
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2018 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2019 if (input->mb_cur_max > 1 &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2020 !re_string_first_byte (input, re_string_cur_idx (input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2021 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2022 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2023 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2024 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2025 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2026
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2027 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2028 && re_string_cur_idx (input) + 1 < re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2029 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2030 /* In this case, '\' escape a character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2031 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2032 re_string_skip_bytes (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2033 c2 = re_string_peek_byte (input, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2034 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2035 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2036 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2037 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2038 if (c == '[') /* '[' is a special char in a bracket exps. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2039 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2040 unsigned char c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2041 int token_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2042 if (re_string_cur_idx (input) + 1 < re_string_length (input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2043 c2 = re_string_peek_byte (input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2044 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2045 c2 = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2046 token->opr.c = c2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2047 token_len = 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2048 switch (c2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2049 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2050 case '.':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2051 token->type = OP_OPEN_COLL_ELEM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2052 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2053 case '=':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2054 token->type = OP_OPEN_EQUIV_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2055 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2056 case ':':
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2057 if (syntax & RE_CHAR_CLASSES)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2058 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2059 token->type = OP_OPEN_CHAR_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2060 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2061 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2062 /* else fall through. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2063 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2064 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2065 token->opr.c = c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2066 token_len = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2067 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2068 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2069 return token_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2070 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2071 switch (c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2072 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2073 case '-':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2074 token->type = OP_CHARSET_RANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2075 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2076 case ']':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2077 token->type = OP_CLOSE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2078 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2079 case '^':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2080 token->type = OP_NON_MATCH_LIST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2081 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2082 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2083 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2084 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2085 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2086 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2087
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2088 /* Functions for parser. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2089
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2090 /* Entry point of the parser.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2091 Parse the regular expression REGEXP and return the structure tree.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2092 If an error is occured, ERR is set by error code, and return NULL.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2093 This function build the following tree, from regular expression <reg_exp>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2094 CAT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2095 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2096 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2097 <reg_exp> EOR
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2098
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2099 CAT means concatenation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2100 EOR means end of regular expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2101
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2102 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2103 parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2104 reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2105 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2106 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2107 bin_tree_t *tree, *eor, *root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2108 re_token_t current_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2109 dfa->syntax = syntax;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2110 fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2111 tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2112 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2113 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2114 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2115 if (tree != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2116 root = create_tree (dfa, tree, eor, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2117 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2118 root = eor;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2119 if (BE (eor == NULL || root == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2120 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2121 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2122 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2123 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2124 return root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2125 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2126
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2127 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2128 <branch1>|<branch2>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2129 ALT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2130 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2131 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2132 <branch1> <branch2>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2133
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2134 ALT means alternative, which represents the operator `|'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2135
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2136 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2137 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2138 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2139 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2140 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2141 bin_tree_t *tree, *branch = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2142 tree = parse_branch (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2143 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2144 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2145
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2146 while (token->type == OP_ALT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2147 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2148 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2149 if (token->type != OP_ALT && token->type != END_OF_RE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2150 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2151 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2152 branch = parse_branch (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2153 if (BE (*err != REG_NOERROR && branch == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2154 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2155 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2156 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2157 branch = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2158 tree = create_tree (dfa, tree, branch, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2159 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2160 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2161 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2162 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2163 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2164 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2165 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2166 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2167
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2168 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2169 <exp1><exp2>:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2170 CAT
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2171 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2172 / \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2173 <exp1> <exp2>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2174
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2175 CAT means concatenation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2176
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2177 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2178 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2179 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2180 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2181 bin_tree_t *tree, *expr;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2182 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2183 tree = parse_expression (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2184 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2185 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2186
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2187 while (token->type != OP_ALT && token->type != END_OF_RE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2188 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2189 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2190 expr = parse_expression (regexp, preg, token, syntax, nest, err);
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2191 if (BE (*err != REG_NOERROR && expr == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2192 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2193 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2194 }
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2195 if (tree != NULL && expr != NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2196 {
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2197 tree = create_tree (dfa, tree, expr, CONCAT);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2198 if (tree == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2199 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2200 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2201 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2202 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2203 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2204 else if (tree == NULL)
7694
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2205 tree = expr;
c818925b8298 * lib/regcomp.c (parse_branch): Rename local, exp->expr, to avoid
Jim Meyering <jim@meyering.net>
parents: 6733
diff changeset
2206 /* Otherwise expr == NULL, we don't need to create new tree. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2207 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2208 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2209 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2210
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2211 /* This function build the following tree, from regular expression a*:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2212 *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2213 |
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2214 a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2215 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2216
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2217 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2218 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2219 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2220 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2221 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2222 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2223 switch (token->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2224 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2225 case CHARACTER:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2226 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2227 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2228 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2229 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2230 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2231 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2232 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2233 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2234 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2235 while (!re_string_eoi (regexp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2236 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2237 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2238 bin_tree_t *mbc_remain;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2239 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2240 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2241 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2242 if (BE (mbc_remain == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2243 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2244 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2245 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2246 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2247 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2248 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2249 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2250 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2251 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2252 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2253 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2254 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2255 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2256 case OP_OPEN_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2257 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2258 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2259 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2260 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2261 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2262 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2263 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2264 *err = REG_ESUBREG;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2265 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2266 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2267 dfa->used_bkref_map |= 1 << token->opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2268 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2269 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2270 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2271 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2272 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2273 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2274 ++dfa->nbackref;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2275 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2276 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2277 case OP_OPEN_DUP_NUM:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2278 if (syntax & RE_CONTEXT_INVALID_DUP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2279 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2280 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2281 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2282 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2283 /* FALLTHROUGH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2284 case OP_DUP_ASTERISK:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2285 case OP_DUP_PLUS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2286 case OP_DUP_QUESTION:
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2287 if (syntax & RE_CONTEXT_INVALID_OPS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2288 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2289 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2290 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2291 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2292 else if (syntax & RE_CONTEXT_INDEP_OPS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2293 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2294 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2295 return parse_expression (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2296 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2297 /* else fall through */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2298 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2299 if ((token->type == OP_CLOSE_SUBEXP) &&
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2300 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2301 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2302 *err = REG_ERPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2303 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2304 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2305 /* else fall through */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2306 case OP_CLOSE_DUP_NUM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2307 /* We treat it as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2308
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2309 /* Then we can these characters as normal characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2310 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2311 /* mb_partial and word_char bits should be initialized already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2312 by peek_token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2313 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2314 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2315 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2316 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2317 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2318 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2319 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2320 case ANCHOR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2321 if ((token->opr.ctx_type
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2322 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2323 && dfa->word_ops_used == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2324 init_word_char (dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2325 if (token->opr.ctx_type == WORD_DELIM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2326 || token->opr.ctx_type == NOT_WORD_DELIM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2327 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2328 bin_tree_t *tree_first, *tree_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2329 if (token->opr.ctx_type == WORD_DELIM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2330 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2331 token->opr.ctx_type = WORD_FIRST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2332 tree_first = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2333 token->opr.ctx_type = WORD_LAST;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2334 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2335 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2336 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2337 token->opr.ctx_type = INSIDE_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2338 tree_first = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2339 token->opr.ctx_type = INSIDE_NOTWORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2340 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2341 tree_last = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2342 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2343 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2344 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2345 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2346 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2347 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2348 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2349 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2350 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2351 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2352 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2353 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2354 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2355 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2356 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2357 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2358 /* We must return here, since ANCHORs can't be followed
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2359 by repetition operators.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2360 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2361 it must not be "<ANCHOR(^)><REPEAT(*)>". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2362 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2363 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2364 case OP_PERIOD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2365 tree = create_token_tree (dfa, NULL, NULL, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2366 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2367 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2368 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2369 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2370 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2371 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2372 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2373 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2374 case OP_WORD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2375 case OP_NOTWORD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2376 tree = build_charclass_op (dfa, regexp->trans,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2377 (const unsigned char *) "alnum",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2378 (const unsigned char *) "_",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2379 token->type == OP_NOTWORD, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2380 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2381 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2382 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2383 case OP_SPACE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2384 case OP_NOTSPACE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2385 tree = build_charclass_op (dfa, regexp->trans,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2386 (const unsigned char *) "space",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2387 (const unsigned char *) "",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2388 token->type == OP_NOTSPACE, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2389 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2390 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2391 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2392 case OP_ALT:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2393 case END_OF_RE:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2394 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2395 case BACK_SLASH:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2396 *err = REG_EESCAPE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2397 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2398 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2399 /* Must not happen? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2400 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2401 assert (0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2402 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2403 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2404 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2405 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2406
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2407 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2408 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2409 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2410 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2411 if (BE (*err != REG_NOERROR && tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2412 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2413 /* In BRE consecutive duplications are not allowed. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2414 if ((syntax & RE_CONTEXT_INVALID_DUP)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2415 && (token->type == OP_DUP_ASTERISK
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2416 || token->type == OP_OPEN_DUP_NUM))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2417 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2418 *err = REG_BADRPT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2419 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2420 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2421 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2422
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2423 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2424 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2425
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2426 /* This function build the following tree, from regular expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2427 (<reg_exp>):
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2428 SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2429 |
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2430 <reg_exp>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2431 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2432
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2433 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2434 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2435 reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2436 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2437 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2438 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2439 size_t cur_nsub;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2440 cur_nsub = preg->re_nsub++;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2441
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2442 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2443
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2444 /* The subexpression may be a null string. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2445 if (token->type == OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2446 tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2447 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2448 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2449 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2450 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2451 *err = REG_EPAREN;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2452 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2453 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2454 }
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2455
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2456 if (cur_nsub <= '9' - '1')
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6170
diff changeset
2457 dfa->completed_bkref_map |= 1 << cur_nsub;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2458
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2459 tree = create_tree (dfa, tree, NULL, SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2460 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2461 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2462 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2463 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2464 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2465 tree->token.opr.idx = cur_nsub;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2466 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2467 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2468
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2469 /* This function parse repetition operators like "*", "+", "{1,3}" etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2470
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2471 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2472 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2473 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2474 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2475 bin_tree_t *tree = NULL, *old_tree = NULL;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2476 Idx i, start, end, start_idx = re_string_cur_idx (regexp);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2477 re_token_t start_token = *token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2478
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2479 if (token->type == OP_OPEN_DUP_NUM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2480 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2481 end = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2482 start = fetch_number (regexp, token, syntax);
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2483 if (start == REG_MISSING)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2484 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2485 if (token->type == CHARACTER && token->opr.c == ',')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2486 start = 0; /* We treat "{,m}" as "{0,m}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2487 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2488 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2489 *err = REG_BADBR; /* <re>{} is invalid. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2490 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2491 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2492 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2493 if (BE (start != REG_ERROR, 1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2494 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2495 /* We treat "{n}" as "{n,n}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2496 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2497 : ((token->type == CHARACTER && token->opr.c == ',')
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2498 ? fetch_number (regexp, token, syntax) : REG_ERROR));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2499 }
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2500 if (BE (start == REG_ERROR || end == REG_ERROR, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2501 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2502 /* Invalid sequence. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2503 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2504 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2505 if (token->type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2506 *err = REG_EBRACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2507 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2508 *err = REG_BADBR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2509
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2510 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2511 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2512
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2513 /* If the syntax bit is set, rollback. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2514 re_string_set_index (regexp, start_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2515 *token = start_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2516 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2517 /* mb_partial and word_char bits should be already initialized by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2518 peek_token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2519 return elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2520 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2521
12570
3ed9d84fee81 regcomp: recognize ill-formed { } expressions
Ulrich Drepper <drepper@redhat.com>
parents: 12569
diff changeset
2522 if (BE ((end != REG_MISSING && start > end)
3ed9d84fee81 regcomp: recognize ill-formed { } expressions
Ulrich Drepper <drepper@redhat.com>
parents: 12569
diff changeset
2523 || token->type != OP_CLOSE_DUP_NUM, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2524 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2525 /* First number greater than second. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2526 *err = REG_BADBR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2527 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2528 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2529 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2530 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2531 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2532 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2533 end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2534 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2535
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2536 fetch_token (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2537
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2538 if (BE (elem == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2539 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2540 if (BE (start == 0 && end == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2541 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2542 postorder (elem, free_tree, NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2543 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2544 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2545
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2546 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2547 if (BE (start > 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2548 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2549 tree = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2550 for (i = 2; i <= start; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2551 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2552 elem = duplicate_tree (elem, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2553 tree = create_tree (dfa, tree, elem, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2554 if (BE (elem == NULL || tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2555 goto parse_dup_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2556 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2557
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2558 if (start == end)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2559 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2560
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2561 /* Duplicate ELEM before it is marked optional. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2562 elem = duplicate_tree (elem, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2563 old_tree = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2564 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2565 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2566 old_tree = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2567
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2568 if (elem->token.type == SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2569 postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2570
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2571 tree = create_tree (dfa, elem, NULL,
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2572 (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2573 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2574 goto parse_dup_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2575
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2576 /* This loop is actually executed only when end != REG_MISSING,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2577 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2578 already created the start+1-th copy. */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2579 if ((Idx) -1 < 0 || end != REG_MISSING)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2580 for (i = start + 2; i <= end; ++i)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2581 {
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2582 elem = duplicate_tree (elem, dfa);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2583 tree = create_tree (dfa, tree, elem, CONCAT);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2584 if (BE (elem == NULL || tree == NULL, 0))
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2585 goto parse_dup_op_espace;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2586
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2587 tree = create_tree (dfa, tree, NULL, OP_ALT);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2588 if (BE (tree == NULL, 0))
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2589 goto parse_dup_op_espace;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2590 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2591
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2592 if (old_tree)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2593 tree = create_tree (dfa, old_tree, tree, CONCAT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2594
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2595 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2596
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2597 parse_dup_op_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2598 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2599 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2600 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2601
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2602 /* Size of the names for collating symbol/equivalence_class/character_class.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2603 I'm not sure, but maybe enough. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2604 #define BRACKET_NAME_BUF_SIZE 32
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2605
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2606 #ifndef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2607 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2608 Build the range expression which starts from START_ELEM, and ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2609 at END_ELEM. The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2610 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2611 mbcset->range_ends, is a pointer argument sinse we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2612 update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2613
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2614 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2615 internal_function
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2616 # ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2617 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2618 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2619 # else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2620 build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2621 bracket_elem_t *end_elem)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2622 # endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2623 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2624 unsigned int start_ch, end_ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2625 /* Equivalence Classes and Character Classes can't be a range start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2626 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2627 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2628 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2629 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2630
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2631 /* We can handle no multi character collating elements without libc
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2632 support. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2633 if (BE ((start_elem->type == COLL_SYM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2634 && strlen ((char *) start_elem->opr.name) > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2635 || (end_elem->type == COLL_SYM
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2636 && strlen ((char *) end_elem->opr.name) > 1), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2637 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2638
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2639 # ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2640 {
5972
aa260da0bbbe * config/srclist.txt: Comment out regcomp.c, since we have a porting fix
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
2641 wchar_t wc;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2642 wint_t start_wc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2643 wint_t end_wc;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2644 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2645
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2646 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2647 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2648 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2649 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2650 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2651 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2652 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2653 ? __btowc (start_ch) : start_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2654 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2655 ? __btowc (end_ch) : end_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2656 if (start_wc == WEOF || end_wc == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2657 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2658 cmp_buf[0] = start_wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2659 cmp_buf[4] = end_wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2660 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2661 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2662
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2663 /* Got valid collation sequence values, add them as a new entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2664 However, for !_LIBC we have no collation elements: if the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2665 character set is single byte, the single byte character set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2666 that we build below suffices. parse_bracket_exp passes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2667 no MBCSET if dfa->mb_cur_max == 1. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2668 if (mbcset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2669 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2670 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2671 if (BE (*range_alloc == mbcset->nranges, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2672 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2673 /* There is not enough space, need realloc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2674 wchar_t *new_array_start, *new_array_end;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2675 Idx new_nranges;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2676
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2677 /* +1 in case of mbcset->nranges is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2678 new_nranges = 2 * mbcset->nranges + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2679 /* Use realloc since mbcset->range_starts and mbcset->range_ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2680 are NULL if *range_alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2681 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2682 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2683 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2684 new_nranges);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2685
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2686 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2687 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2688
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2689 mbcset->range_starts = new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2690 mbcset->range_ends = new_array_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2691 *range_alloc = new_nranges;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2692 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2693
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2694 mbcset->range_starts[mbcset->nranges] = start_wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2695 mbcset->range_ends[mbcset->nranges++] = end_wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2696 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2697
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2698 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2699 for (wc = 0; wc < SBC_MAX; ++wc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2700 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2701 cmp_buf[2] = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2702 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2703 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2704 bitset_set (sbcset, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2705 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2706 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2707 # else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2708 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2709 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2710 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2711 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2712 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2713 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2714 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2715 : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2716 if (start_ch > end_ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2717 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2718 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2719 for (ch = 0; ch < SBC_MAX; ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2720 if (start_ch <= ch && ch <= end_ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2721 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2722 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2723 # endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2724 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2725 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2726 #endif /* not _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2727
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2728 #ifndef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2729 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2730 Build the collating element which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2731 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2732 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2733 pointer argument since we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2734
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2735 static reg_errcode_t
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2736 internal_function
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2737 build_collating_symbol (bitset_t sbcset,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2738 # ifdef RE_ENABLE_I18N
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2739 re_charset_t *mbcset, Idx *coll_sym_alloc,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2740 # endif
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2741 const unsigned char *name)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2742 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2743 size_t name_len = strlen ((const char *) name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2744 if (BE (name_len != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2745 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2746 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2747 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2748 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2749 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2750 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2751 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2752 #endif /* not _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2753
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2754 /* This function parse bracket expression like "[abc]", "[a-c]",
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2755 "[[.a-a.]]" etc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2756
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2757 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2758 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
2759 reg_syntax_t syntax, reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2760 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2761 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2762 const unsigned char *collseqmb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2763 const char *collseqwc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2764 uint32_t nrules;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2765 int32_t table_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2766 const int32_t *symb_table;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2767 const unsigned char *extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2768
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2769 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2770 Seek the collating symbol entry correspondings to NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2771 Return the index of the symbol in the SYMB_TABLE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2772
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2773 auto inline int32_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2774 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2775 seek_collating_symbol_entry (name, name_len)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2776 const unsigned char *name;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2777 size_t name_len;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2778 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2779 int32_t hash = elem_hash ((const char *) name, name_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2780 int32_t elem = hash % table_size;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2781 if (symb_table[2 * elem] != 0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2782 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2783 int32_t second = hash % (table_size - 2) + 1;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2784
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2785 do
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2786 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2787 /* First compare the hashing value. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2788 if (symb_table[2 * elem] == hash
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2789 /* Compare the length of the name. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2790 && name_len == extra[symb_table[2 * elem + 1]]
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2791 /* Compare the name. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2792 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2793 name_len) == 0)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2794 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2795 /* Yep, this is the entry. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2796 break;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2797 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2798
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2799 /* Next entry. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2800 elem += second;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2801 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2802 while (symb_table[2 * elem] != 0);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2803 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2804 return elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2805 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2806
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2807 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2808 Look up the collation sequence value of BR_ELEM.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2809 Return the value if succeeded, UINT_MAX otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2810
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2811 auto inline unsigned int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2812 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2813 lookup_collation_sequence_value (br_elem)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2814 bracket_elem_t *br_elem;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2815 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2816 if (br_elem->type == SB_CHAR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2817 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2818 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2819 if (MB_CUR_MAX == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2820 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2821 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2822 return collseqmb[br_elem->opr.ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2823 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2824 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2825 wint_t wc = __btowc (br_elem->opr.ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2826 return __collseq_table_lookup (collseqwc, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2827 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2828 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2829 else if (br_elem->type == MB_CHAR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2830 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2831 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2832 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2833 else if (br_elem->type == COLL_SYM)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2834 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2835 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2836 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2837 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2838 int32_t elem, idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2839 elem = seek_collating_symbol_entry (br_elem->opr.name,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2840 sym_name_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2841 if (symb_table[2 * elem] != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2842 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2843 /* We found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2844 idx = symb_table[2 * elem + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2845 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2846 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2847 /* Skip the byte sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2848 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2849 /* Adjust for the alignment. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2850 idx = (idx + 3) & ~3;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2851 /* Skip the multibyte collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2852 idx += sizeof (unsigned int);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2853 /* Skip the wide char sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2854 idx += sizeof (unsigned int) *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2855 (1 + *(unsigned int *) (extra + idx));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2856 /* Return the collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2857 return *(unsigned int *) (extra + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2858 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2859 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2860 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2861 /* No valid character. Match it as a single byte
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2862 character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2863 return collseqmb[br_elem->opr.name[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2864 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2865 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2866 else if (sym_name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2867 return collseqmb[br_elem->opr.name[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2868 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2869 return UINT_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2870 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2871
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2872 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2873 Build the range expression which starts from START_ELEM, and ends
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2874 at END_ELEM. The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2875 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2876 mbcset->range_ends, is a pointer argument sinse we may
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2877 update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2878
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2879 auto inline reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2880 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2881 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2882 re_charset_t *mbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2883 Idx *range_alloc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2884 bitset_t sbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2885 bracket_elem_t *start_elem, *end_elem;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2886 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2887 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2888 uint32_t start_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2889 uint32_t end_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2890
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2891 /* Equivalence Classes and Character Classes can't be a range
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2892 start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2893 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2894 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2895 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2896 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2897
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2898 start_collseq = lookup_collation_sequence_value (start_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2899 end_collseq = lookup_collation_sequence_value (end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2900 /* Check start/end collation sequence values. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2901 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2902 return REG_ECOLLATE;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2903 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2904 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2905
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2906 /* Got valid collation sequence values, add them as a new entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2907 However, if we have no collation elements, and the character set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2908 is single byte, the single byte character set that we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2909 build below suffices. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2910 if (nrules > 0 || dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2911 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2912 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2913 if (BE (*range_alloc == mbcset->nranges, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2914 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2915 /* There is not enough space, need realloc. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2916 uint32_t *new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2917 uint32_t *new_array_end;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
2918 Idx new_nranges;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2919
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2920 /* +1 in case of mbcset->nranges is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2921 new_nranges = 2 * mbcset->nranges + 1;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2922 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2923 new_nranges);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2924 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2925 new_nranges);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2926
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2927 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2928 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2929
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2930 mbcset->range_starts = new_array_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2931 mbcset->range_ends = new_array_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2932 *range_alloc = new_nranges;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2933 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2934
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2935 mbcset->range_starts[mbcset->nranges] = start_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2936 mbcset->range_ends[mbcset->nranges++] = end_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2937 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2938
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2939 /* Build the table for single byte characters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2940 for (ch = 0; ch < SBC_MAX; ch++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2941 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2942 uint32_t ch_collseq;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2943 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2944 if (MB_CUR_MAX == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2945 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2946 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2947 ch_collseq = collseqmb[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2948 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2949 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2950 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2951 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2952 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2953 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2954 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2955
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2956 /* Local function for parse_bracket_exp used in _LIBC environement.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2957 Build the collating element which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2958 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2959 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2960 pointer argument sinse we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2961
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2962 auto inline reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2963 __attribute ((always_inline))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2964 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2965 re_charset_t *mbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2966 Idx *coll_sym_alloc;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2967 bitset_t sbcset;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2968 const unsigned char *name;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2969 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2970 int32_t elem, idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2971 size_t name_len = strlen ((const char *) name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2972 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2973 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2974 elem = seek_collating_symbol_entry (name, name_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2975 if (symb_table[2 * elem] != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2976 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2977 /* We found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2978 idx = symb_table[2 * elem + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2979 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2980 idx += 1 + extra[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2981 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2982 else if (symb_table[2 * elem] == 0 && name_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2983 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2984 /* No valid character, treat it as a normal
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2985 character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2986 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2987 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2988 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2989 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2990 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2991
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2992 /* Got valid collation sequence, add it as a new entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2993 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2994 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2995 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2996 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2997 /* +1 in case of mbcset->ncoll_syms is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
2998 Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2999 /* Use realloc since mbcset->coll_syms is NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3000 if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3001 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3002 new_coll_sym_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3003 if (BE (new_coll_syms == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3004 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3005 mbcset->coll_syms = new_coll_syms;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3006 *coll_sym_alloc = new_coll_sym_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3007 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3008 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3009 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3010 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3011 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3012 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3013 if (BE (name_len != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3014 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3015 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3016 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3017 bitset_set (sbcset, name[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3018 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3019 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3020 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3021 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3022 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3023
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3024 re_token_t br_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3025 re_bitset_ptr_t sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3026 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3027 re_charset_t *mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3028 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3029 Idx equiv_class_alloc = 0, char_class_alloc = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3030 #endif /* not RE_ENABLE_I18N */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3031 bool non_match = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3032 bin_tree_t *work_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3033 int token_len;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3034 bool first_round = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3035 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3036 collseqmb = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3037 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3038 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3039 if (nrules)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3040 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3041 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3042 if (MB_CUR_MAX > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3043 */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3044 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3045 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3046 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3047 _NL_COLLATE_SYMB_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3048 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3049 _NL_COLLATE_SYMB_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3050 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3051 #endif
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3052 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3053 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3054 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3055 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3056 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3057 if (BE (sbcset == NULL || mbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3058 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3059 if (BE (sbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3060 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3061 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3062 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3063 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3064 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3065
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3066 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3067 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3068 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3069 *err = REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3070 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3071 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3072 if (token->type == OP_NON_MATCH_LIST)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3073 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3074 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3075 mbcset->non_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3076 #endif /* not RE_ENABLE_I18N */
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3077 non_match = true;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3078 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
8110
8428e492271f 2007-02-05 Paolo Bonzini <bonzini@gnu.org>
Paolo Bonzini <bonzini@gnu.org>
parents: 8073
diff changeset
3079 bitset_set (sbcset, '\n');
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3080 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3081 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3082 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3083 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3084 *err = REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3085 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3086 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3087 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3088
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3089 /* We treat the first ']' as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3090 if (token->type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3091 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3092
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3093 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3094 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3095 bracket_elem_t start_elem, end_elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3096 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3097 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3098 reg_errcode_t ret;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3099 int token_len2 = 0;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3100 bool is_range_exp = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3101 re_token_t token2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3102
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3103 start_elem.opr.name = start_name_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3104 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3105 syntax, first_round);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3106 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3107 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3108 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3109 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3110 }
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3111 first_round = false;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3112
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3113 /* Get information about the next token. We need it in any case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3114 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3115
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3116 /* Do not check for ranges if we know they are not allowed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3117 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3118 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3119 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3120 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3121 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3122 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3123 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3124 if (token->type == OP_CHARSET_RANGE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3125 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3126 re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3127 token_len2 = peek_token_bracket (&token2, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3128 if (BE (token2.type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3129 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3130 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3131 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3132 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3133 if (token2.type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3134 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3135 /* We treat the last '-' as a normal character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3136 re_string_skip_bytes (regexp, -token_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3137 token->type = CHARACTER;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3138 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3139 else
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3140 is_range_exp = true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3141 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3142 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3143
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3144 if (is_range_exp == true)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3145 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3146 end_elem.opr.name = end_name_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3147 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3148 dfa, syntax, true);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3149 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3150 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3151 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3152 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3153 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3154
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3155 token_len = peek_token_bracket (token, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3156
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3157 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3158 *err = build_range_exp (sbcset, mbcset, &range_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3159 &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3160 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3161 # ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3162 *err = build_range_exp (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3163 dfa->mb_cur_max > 1 ? mbcset : NULL,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3164 &range_alloc, &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3165 # else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3166 *err = build_range_exp (sbcset, &start_elem, &end_elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3167 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3168 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3169 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3170 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3171 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3172 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3173 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3174 switch (start_elem.type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3175 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3176 case SB_CHAR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3177 bitset_set (sbcset, start_elem.opr.ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3178 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3179 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3180 case MB_CHAR:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3181 /* Check whether the array has enough space. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3182 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3183 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3184 wchar_t *new_mbchars;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3185 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3186 /* +1 in case of mbcset->nmbchars is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3187 mbchar_alloc = 2 * mbcset->nmbchars + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3188 /* Use realloc since array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3189 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3190 mbchar_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3191 if (BE (new_mbchars == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3192 goto parse_bracket_exp_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3193 mbcset->mbchars = new_mbchars;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3194 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3195 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3196 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3197 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3198 case EQUIV_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3199 *err = build_equiv_class (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3200 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3201 mbcset, &equiv_class_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3202 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3203 start_elem.opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3204 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3205 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3206 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3207 case COLL_SYM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3208 *err = build_collating_symbol (sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3209 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3210 mbcset, &coll_sym_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3211 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3212 start_elem.opr.name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3213 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3214 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3215 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3216 case CHAR_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3217 *err = build_charclass (regexp->trans, sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3218 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3219 mbcset, &char_class_alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3220 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3221 start_elem.opr.name, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3222 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3223 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3224 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3225 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3226 assert (0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3227 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3228 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3229 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3230 if (BE (token->type == END_OF_RE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3231 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3232 *err = REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3233 goto parse_bracket_exp_free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3234 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3235 if (token->type == OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3236 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3237 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3238
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3239 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3240
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3241 /* If it is non-matching list. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3242 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3243 bitset_not (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3244
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3245 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3246 /* Ensure only single byte characters are set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3247 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3248 bitset_mask (sbcset, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3249
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3250 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3251 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3252 || mbcset->non_match)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3253 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3254 bin_tree_t *mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3255 int sbc_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3256 /* Build a tree for complex bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3257 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3258 br_token.type = COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3259 br_token.opr.mbcset = mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3260 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3261 if (BE (mbc_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3262 goto parse_bracket_exp_espace;
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
3263 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3264 if (sbcset[sbc_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3265 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3266 /* If there are no bits set in sbcset, there is no point
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3267 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
6214
afb93b90dcb8 Change bitset word type from unsigned int to unsigned long int,
Paul Eggert <eggert@cs.ucla.edu>
parents: 6206
diff changeset
3268 if (sbc_idx < BITSET_WORDS)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3269 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3270 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3271 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3272 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3273 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3274 if (BE (work_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3275 goto parse_bracket_exp_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3276
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3277 /* Then join them by ALT node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3278 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3279 if (BE (work_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3280 goto parse_bracket_exp_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3281 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3282 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3283 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3284 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3285 work_tree = mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3286 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3287 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3288 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3289 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3290 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3291 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3292 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3293 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3294 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3295 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3296 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3297 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3298 if (BE (work_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3299 goto parse_bracket_exp_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3300 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3301 return work_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3302
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3303 parse_bracket_exp_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3304 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3305 parse_bracket_exp_free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3306 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3307 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3308 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3309 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3310 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3311 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3312
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3313 /* Parse an element in the bracket expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3314
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3315 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3316 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3317 re_token_t *token, int token_len, re_dfa_t *dfa,
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
3318 reg_syntax_t syntax, bool accept_hyphen)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3319 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3320 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3321 int cur_char_size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3322 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3323 if (cur_char_size > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3324 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3325 elem->type = MB_CHAR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3326 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3327 re_string_skip_bytes (regexp, cur_char_size);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3328 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3329 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3330 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3331 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3332 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3333 || token->type == OP_OPEN_EQUIV_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3334 return parse_bracket_symbol (elem, regexp, token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3335 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3336 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3337 /* A '-' must only appear as anything but a range indicator before
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3338 the closing bracket. Everything else is an error. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3339 re_token_t token2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3340 (void) peek_token_bracket (&token2, regexp, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3341 if (token2.type != OP_CLOSE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3342 /* The actual error value is not standardized since this whole
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3343 case is undefined. But ERANGE makes good sense. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3344 return REG_ERANGE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3345 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3346 elem->type = SB_CHAR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3347 elem->opr.ch = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3348 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3349 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3350
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3351 /* Parse a bracket symbol in the bracket expression. Bracket symbols are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3352 such as [:<character_class>:], [.<collating_element>.], and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3353 [=<equivalent_class>=]. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3354
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3355 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3356 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3357 re_token_t *token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3358 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3359 unsigned char ch, delim = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3360 int i = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3361 if (re_string_eoi(regexp))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3362 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3363 for (;; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3364 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3365 if (i >= BRACKET_NAME_BUF_SIZE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3366 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3367 if (token->type == OP_OPEN_CHAR_CLASS)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3368 ch = re_string_fetch_byte_case (regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3369 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3370 ch = re_string_fetch_byte (regexp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3371 if (re_string_eoi(regexp))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3372 return REG_EBRACK;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3373 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3374 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3375 elem->opr.name[i] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3376 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3377 re_string_skip_bytes (regexp, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3378 elem->opr.name[i] = '\0';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3379 switch (token->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3380 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3381 case OP_OPEN_COLL_ELEM:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3382 elem->type = COLL_SYM;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3383 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3384 case OP_OPEN_EQUIV_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3385 elem->type = EQUIV_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3386 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3387 case OP_OPEN_CHAR_CLASS:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3388 elem->type = CHAR_CLASS;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3389 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3390 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3391 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3392 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3393 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3394 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3395
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3396 /* Helper function for parse_bracket_exp.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3397 Build the equivalence class which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3398 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3399 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3400 is a pointer argument sinse we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3401
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3402 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3403 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3404 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3405 Idx *equiv_class_alloc, const unsigned char *name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3406 #else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3407 build_equiv_class (bitset_t sbcset, const unsigned char *name)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3408 #endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3409 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3410 #ifdef _LIBC
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3411 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3412 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3413 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3414 const int32_t *table, *indirect;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3415 const unsigned char *weights, *extra, *cp;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3416 unsigned char char_buf[2];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3417 int32_t idx1, idx2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3418 unsigned int ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3419 size_t len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3420 /* This #include defines a local function! */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3421 # include <locale/weight.h>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3422 /* Calculate the index for equivalence class. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3423 cp = name;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3424 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3425 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3426 _NL_COLLATE_WEIGHTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3427 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3428 _NL_COLLATE_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3429 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3430 _NL_COLLATE_INDIRECTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3431 idx1 = findidx (&cp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3432 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3433 /* This isn't a valid character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3434 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3435
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3436 /* Build single byte matcing table for this equivalence class. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3437 char_buf[1] = (unsigned char) '\0';
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3438 len = weights[idx1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3439 for (ch = 0; ch < SBC_MAX; ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3440 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3441 char_buf[0] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3442 cp = char_buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3443 idx2 = findidx (&cp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3444 /*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3445 idx2 = table[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3446 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3447 if (idx2 == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3448 /* This isn't a valid character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3449 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3450 if (len == weights[idx2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3451 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3452 int cnt = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3453 while (cnt <= len &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3454 weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3455 ++cnt;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3456
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3457 if (cnt > len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3458 bitset_set (sbcset, ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3459 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3460 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3461 /* Check whether the array has enough space. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3462 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3463 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3464 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3465 /* +1 in case of mbcset->nequiv_classes is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3466 Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3467 /* Use realloc since the array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3468 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3469 int32_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3470 new_equiv_class_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3471 if (BE (new_equiv_classes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3472 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3473 mbcset->equiv_classes = new_equiv_classes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3474 *equiv_class_alloc = new_equiv_class_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3475 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3476 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3477 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3478 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3479 #endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3480 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3481 if (BE (strlen ((const char *) name) != 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3482 return REG_ECOLLATE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3483 bitset_set (sbcset, *name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3484 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3485 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3486 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3487
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3488 /* Helper function for parse_bracket_exp.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3489 Build the character class which is represented by NAME.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3490 The result are written to MBCSET and SBCSET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3491 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3492 is a pointer argument sinse we may update it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3493
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3494 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3495 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3496 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3497 re_charset_t *mbcset, Idx *char_class_alloc,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3498 const unsigned char *class_name, reg_syntax_t syntax)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3499 #else /* not RE_ENABLE_I18N */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3500 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3501 const unsigned char *class_name, reg_syntax_t syntax)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3502 #endif /* not RE_ENABLE_I18N */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3503 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3504 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3505 const char *name = (const char *) class_name;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3506
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3507 /* In case of REG_ICASE "upper" and "lower" match the both of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3508 upper and lower cases. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3509 if ((syntax & RE_ICASE)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3510 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3511 name = "alpha";
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3512
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3513 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3514 /* Check the space of the arrays. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3515 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3516 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3517 /* Not enough, realloc it. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3518 /* +1 in case of mbcset->nchar_classes is 0. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3519 Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3520 /* Use realloc since array is NULL if *alloc == 0. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3521 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3522 new_char_class_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3523 if (BE (new_char_classes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3524 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3525 mbcset->char_classes = new_char_classes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3526 *char_class_alloc = new_char_class_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3527 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3528 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3529 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3530
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3531 #define BUILD_CHARCLASS_LOOP(ctype_func) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3532 do { \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3533 if (BE (trans != NULL, 0)) \
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3534 { \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3535 for (i = 0; i < SBC_MAX; ++i) \
6729
c5495b5c3f32 Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
3536 if (ctype_func (i)) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3537 bitset_set (sbcset, trans[i]); \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3538 } \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3539 else \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3540 { \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3541 for (i = 0; i < SBC_MAX; ++i) \
6729
c5495b5c3f32 Fix space-tab problem. From Jim Meyering.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
3542 if (ctype_func (i)) \
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3543 bitset_set (sbcset, i); \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3544 } \
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3545 } while (0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3546
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3547 if (strcmp (name, "alnum") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3548 BUILD_CHARCLASS_LOOP (isalnum);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3549 else if (strcmp (name, "cntrl") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3550 BUILD_CHARCLASS_LOOP (iscntrl);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3551 else if (strcmp (name, "lower") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3552 BUILD_CHARCLASS_LOOP (islower);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3553 else if (strcmp (name, "space") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3554 BUILD_CHARCLASS_LOOP (isspace);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3555 else if (strcmp (name, "alpha") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3556 BUILD_CHARCLASS_LOOP (isalpha);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3557 else if (strcmp (name, "digit") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3558 BUILD_CHARCLASS_LOOP (isdigit);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3559 else if (strcmp (name, "print") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3560 BUILD_CHARCLASS_LOOP (isprint);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3561 else if (strcmp (name, "upper") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3562 BUILD_CHARCLASS_LOOP (isupper);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3563 else if (strcmp (name, "blank") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3564 BUILD_CHARCLASS_LOOP (isblank);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3565 else if (strcmp (name, "graph") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3566 BUILD_CHARCLASS_LOOP (isgraph);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3567 else if (strcmp (name, "punct") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3568 BUILD_CHARCLASS_LOOP (ispunct);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3569 else if (strcmp (name, "xdigit") == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3570 BUILD_CHARCLASS_LOOP (isxdigit);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3571 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3572 return REG_ECTYPE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3573
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3574 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3575 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3576
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3577 static bin_tree_t *
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3578 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3579 const unsigned char *class_name,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3580 const unsigned char *extra, bool non_match,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3581 reg_errcode_t *err)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3582 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3583 re_bitset_ptr_t sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3584 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3585 re_charset_t *mbcset;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3586 Idx alloc = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3587 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3588 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3589 re_token_t br_token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3590 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3591
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3592 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3593 #ifdef RE_ENABLE_I18N
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3594 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3595 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3596
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3597 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3598 if (BE (sbcset == NULL || mbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3599 #else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3600 if (BE (sbcset == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3601 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3602 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3603 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3604 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3605 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3606
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3607 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3608 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3609 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3610 mbcset->non_match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3611 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3612 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3613
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3614 /* We don't care the syntax in this case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3615 ret = build_charclass (trans, sbcset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3616 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3617 mbcset, &alloc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3618 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3619 class_name, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3621 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3622 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3623 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3624 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3625 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3626 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3627 *err = ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3628 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3629 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3630 /* \w match '_' also. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3631 for (; *extra; extra++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3632 bitset_set (sbcset, *extra);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3633
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3634 /* If it is non-matching list. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3635 if (non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3636 bitset_not (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3637
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3638 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3639 /* Ensure only single byte characters are set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3640 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3641 bitset_mask (sbcset, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3642 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3643
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3644 /* Build a tree for simple bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3645 br_token.type = SIMPLE_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3646 br_token.opr.sbcset = sbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3647 tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3648 if (BE (tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3649 goto build_word_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3650
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3651 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3652 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3653 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3654 bin_tree_t *mbc_tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3655 /* Build a tree for complex bracket. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3656 br_token.type = COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3657 br_token.opr.mbcset = mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3658 dfa->has_mb_node = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3659 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3660 if (BE (mbc_tree == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3661 goto build_word_op_espace;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3662 /* Then join them by ALT node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3663 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3664 if (BE (mbc_tree != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3665 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3666 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3667 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3668 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3669 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3670 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3671 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3672 #else /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3673 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3674 #endif /* not RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3675
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3676 build_word_op_espace:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3677 re_free (sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3678 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3679 free_charset (mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3680 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3681 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3682 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3683 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3684
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3685 /* This is intended for the expressions like "a{1,3}".
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3686 Fetch a number from `input', and return the number.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3687 Return REG_MISSING if the number field is empty like "{,1}".
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3688 Return REG_ERROR if an error occurred. */
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3689
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3690 static Idx
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3691 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3692 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3693 Idx num = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3694 unsigned char c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3695 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3696 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3697 fetch_token (token, input, syntax);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3698 c = token->opr.c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3699 if (BE (token->type == END_OF_RE, 0))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3700 return REG_ERROR;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3701 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3702 break;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3703 num = ((token->type != CHARACTER || c < '0' || '9' < c
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3704 || num == REG_ERROR)
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3705 ? REG_ERROR
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3706 : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0'));
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
3707 num = (num > RE_DUP_MAX) ? REG_ERROR : num;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3708 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3709 return num;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3710 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3711
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3712 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3713 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3714 free_charset (re_charset_t *cset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3715 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3716 re_free (cset->mbchars);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3717 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3718 re_free (cset->coll_syms);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3719 re_free (cset->equiv_classes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3720 re_free (cset->range_starts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3721 re_free (cset->range_ends);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3722 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3723 re_free (cset->char_classes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3724 re_free (cset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3725 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3726 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3727
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3728 /* Functions for binary tree operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3729
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3730 /* Create a tree node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3731
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3732 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3733 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3734 re_token_type_t type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3735 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3736 re_token_t t;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3737 t.type = type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3738 return create_token_tree (dfa, left, right, &t);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3739 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3740
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3741 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3742 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3743 const re_token_t *token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3744 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3745 bin_tree_t *tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3746 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3747 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3748 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3749
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3750 if (storage == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3751 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3752 storage->next = dfa->str_tree_storage;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3753 dfa->str_tree_storage = storage;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3754 dfa->str_tree_storage_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3755 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3756 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3757
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3758 tree->parent = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3759 tree->left = left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3760 tree->right = right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3761 tree->token = *token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3762 tree->token.duplicated = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3763 tree->token.opt_subexp = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3764 tree->first = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3765 tree->next = NULL;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3766 tree->node_idx = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3767
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3768 if (left != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3769 left->parent = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3770 if (right != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3771 right->parent = tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3772 return tree;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3773 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3774
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3775 /* Mark the tree SRC as an optional subexpression.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3776 To be called from preorder or postorder. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3777
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3778 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3779 mark_opt_subexp (void *extra, bin_tree_t *node)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3780 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6174
diff changeset
3781 Idx idx = (Idx) (long) extra;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3782 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3783 node->token.opt_subexp = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3784
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3785 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3786 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3787
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3788 /* Free the allocated memory inside NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3789
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3790 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3791 free_token (re_token_t *node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3792 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3793 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3794 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3795 free_charset (node->opr.mbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3796 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3797 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3798 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3799 re_free (node->opr.sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3800 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3801
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3802 /* Worker function for tree walking. Free the allocated memory inside NODE
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3803 and its children. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3804
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3805 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3806 free_tree (void *extra, bin_tree_t *node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3807 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3808 free_token (&node->token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3809 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3810 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3811
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3812
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3813 /* Duplicate the node SRC, and return new node. This is a preorder
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3814 visit similar to the one implemented by the generic visitor, but
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3815 we need more infrastructure to maintain two parallel trees --- so,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3816 it's easier to duplicate. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3817
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3818 static bin_tree_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6071
diff changeset
3819 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3820 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3821 const bin_tree_t *node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3822 bin_tree_t *dup_root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3823 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3824
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3825 for (node = root; ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3826 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3827 /* Create a new tree and link it back to the current parent. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3828 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3829 if (*p_new == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3830 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3831 (*p_new)->parent = dup_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3832 (*p_new)->token.duplicated = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3833 dup_node = *p_new;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3834
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3835 /* Go to the left node, or up and to the right. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3836 if (node->left)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3837 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3838 node = node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3839 p_new = &dup_node->left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3840 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3841 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3842 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3843 const bin_tree_t *prev = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3844 while (node->right == prev || node->right == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3845 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3846 prev = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3847 node = node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3848 dup_node = dup_node->parent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3849 if (!node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3850 return dup_root;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3851 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3852 node = node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3853 p_new = &dup_node->right;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3854 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3855 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3856 }