annotate lib/regex_internal.c @ 16511:2074f2bf7216

math code: Add comments. * lib/acosl.c: Add comment about related glibc source files. * lib/asinl.c: Likewise. * lib/atanl.c: Likewise. * lib/expl.c: Likewise. * lib/logl.c: Likewise. * lib/sincosl.c: Likewise. * lib/sinl.c: Likewise. * lib/tanl.c: Likewise. * lib/trigl.c: Likewise. * lib/cosl.c: Likewise. Fix comments.
author Bruno Haible <bruno@clisp.org>
date Wed, 29 Feb 2012 12:30:07 +0100
parents 53295e65981f
children 56ba760172b4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1 /* Extended regular expression matching and search library.
16201
8250f2777afc maint: update all copyright year number ranges
Jim Meyering <meyering@redhat.com>
parents: 14079
diff changeset
2 Copyright (C) 2002-2012 Free Software Foundation, Inc.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3 This file is part of the GNU C Library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
5
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
6 This program is free software; you can redistribute it and/or modify
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
8 the Free Software Foundation; either version 2, or (at your option)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
9 any later version.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
10
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
11 This program is distributed in the hope that it will be useful,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
14 GNU General Public License for more details.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
15
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
16 You should have received a copy of the GNU General Public License along
16366
bb182ee4a09d maint: replace FSF snail-mail addresses with URLs
Paul Eggert <eggert@cs.ucla.edu>
parents: 16361
diff changeset
17 with this program; if not, see <http://www.gnu.org/licenses/>. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
18
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
19 static void re_string_construct_common (const char *str, Idx len,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
20 re_string_t *pstr,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
21 RE_TRANSLATE_TYPE trans, bool icase,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
22 const re_dfa_t *dfa) internal_function;
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
23 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
24 const re_node_set *nodes,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
25 re_hashval_t hash) internal_function;
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
26 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
27 const re_node_set *nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
28 unsigned int context,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
29 re_hashval_t hash) internal_function;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
30
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
31 /* Functions for string operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
32
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
33 /* This function allocate the buffers. It is necessary to call
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
34 re_string_reconstruct before using the object. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
35
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
36 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
37 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
38 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
39 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
40 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
41 reg_errcode_t ret;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
42 Idx init_buf_len;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
43
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
44 /* Ensure at least one character fits into the buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
45 if (init_len < dfa->mb_cur_max)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
46 init_len = dfa->mb_cur_max;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
47 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
48 re_string_construct_common (str, len, pstr, trans, icase, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
49
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
50 ret = re_string_realloc_buffers (pstr, init_buf_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
51 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
52 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
53
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
54 pstr->word_char = dfa->word_char;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
55 pstr->word_ops_used = dfa->word_ops_used;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
56 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
57 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
58 pstr->valid_raw_len = pstr->valid_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
59 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
60 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
61
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
62 /* This function allocate the buffers, and initialize them. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
63
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
64 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
65 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
66 re_string_construct (re_string_t *pstr, const char *str, Idx len,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
67 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
68 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
69 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
70 memset (pstr, '\0', sizeof (re_string_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
71 re_string_construct_common (str, len, pstr, trans, icase, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
72
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
73 if (len > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
74 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
75 ret = re_string_realloc_buffers (pstr, len + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
76 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
77 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
78 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
79 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
80
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
81 if (icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
82 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
83 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
84 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
85 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
86 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
87 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
88 ret = build_wcs_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
89 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
90 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
91 if (pstr->valid_raw_len >= len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
92 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
93 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
94 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
95 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
96 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
97 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
98 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
99 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
100 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
101 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
102 build_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
103 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
104 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
105 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
106 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
107 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
108 build_wcs_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
109 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
110 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
111 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
112 if (trans != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
113 re_string_translate_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
114 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
115 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
116 pstr->valid_len = pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
117 pstr->valid_raw_len = pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
118 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
119 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
120 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
121
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
122 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
123 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
124
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
125 /* Helper functions for re_string_allocate, and re_string_construct. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
126
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
127 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
128 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
129 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
130 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
131 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
132 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
133 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
134 wint_t *new_wcs;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
135
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
136 /* Avoid overflow in realloc. */
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
137 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
138 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
139 return REG_ESPACE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
140
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
141 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
142 if (BE (new_wcs == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
143 return REG_ESPACE;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
144 pstr->wcs = new_wcs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
145 if (pstr->offsets != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
146 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
147 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
148 if (BE (new_offsets == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
149 return REG_ESPACE;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
150 pstr->offsets = new_offsets;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
151 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
152 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
153 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
154 if (pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
155 {
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
156 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
157 new_buf_len);
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
158 if (BE (new_mbs == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
159 return REG_ESPACE;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
160 pstr->mbs = new_mbs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
161 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
162 pstr->bufs_len = new_buf_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
163 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
164 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
165
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
166
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
167 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
168 internal_function
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
169 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
170 RE_TRANSLATE_TYPE trans, bool icase,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
171 const re_dfa_t *dfa)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
172 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
173 pstr->raw_mbs = (const unsigned char *) str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
174 pstr->len = len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
175 pstr->raw_len = len;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
176 pstr->trans = trans;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
177 pstr->icase = icase;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
178 pstr->mbs_allocated = (trans != NULL || icase);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
179 pstr->mb_cur_max = dfa->mb_cur_max;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
180 pstr->is_utf8 = dfa->is_utf8;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
181 pstr->map_notascii = dfa->map_notascii;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
182 pstr->stop = pstr->len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
183 pstr->raw_stop = pstr->stop;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
184 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
185
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
186 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
187
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
188 /* Build wide character buffer PSTR->WCS.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
189 If the byte sequence of the string are:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
190 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
191 Then wide character buffer will be:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
192 <wc1> , WEOF , <wc2> , WEOF , <wc3>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
193 We use WEOF for padding, they indicate that the position isn't
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
194 a first byte of a multibyte character.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
195
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
196 Note that this function assumes PSTR->VALID_LEN elements are already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
197 built and starts from PSTR->VALID_LEN. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
198
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
199 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
200 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
201 build_wcs_buffer (re_string_t *pstr)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
202 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
203 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
204 unsigned char buf[MB_LEN_MAX];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
205 assert (MB_LEN_MAX >= pstr->mb_cur_max);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
206 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
207 unsigned char buf[64];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
208 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
209 mbstate_t prev_st;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
210 Idx byte_idx, end_idx, remain_len;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
211 size_t mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
212
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
213 /* Build the buffers from pstr->valid_len to either pstr->len or
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
214 pstr->bufs_len. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
215 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
216 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
217 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
218 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
219 const char *p;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
220
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
221 remain_len = end_idx - byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
222 prev_st = pstr->cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
223 /* Apply the translation if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
224 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
225 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
226 int i, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
227
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
228 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
229 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
230 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
231 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
232 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
233 p = (const char *) buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
234 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
235 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
236 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
237 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
238 if (BE (mbclen == (size_t) -1 || mbclen == 0
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
239 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
240 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
241 /* We treat these cases as a singlebyte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
242 mbclen = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
243 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
244 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
245 wc = pstr->trans[wc];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
246 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
247 }
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
248 else if (BE (mbclen == (size_t) -2, 0))
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
249 {
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
250 /* The buffer doesn't have enough space, finish to build. */
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
251 pstr->cur_state = prev_st;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
252 break;
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
253 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
254
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
255 /* Write wide character and padding. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
256 pstr->wcs[byte_idx++] = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
257 /* Write paddings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
258 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
259 pstr->wcs[byte_idx++] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
260 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
261 pstr->valid_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
262 pstr->valid_raw_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
263 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
264
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
265 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
266 but for REG_ICASE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
267
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
268 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
269 internal_function __attribute_warn_unused_result__
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
270 build_wcs_upper_buffer (re_string_t *pstr)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
271 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
272 mbstate_t prev_st;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
273 Idx src_idx, byte_idx, end_idx, remain_len;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
274 size_t mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
275 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
276 char buf[MB_LEN_MAX];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
277 assert (MB_LEN_MAX >= pstr->mb_cur_max);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
278 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
279 char buf[64];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
280 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
281
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
282 byte_idx = pstr->valid_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
283 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
284
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
285 /* The following optimization assumes that ASCII characters can be
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
286 mapped to wide characters with a simple cast. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
287 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
288 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
289 while (byte_idx < end_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
290 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
291 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
292
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
293 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
294 && mbsinit (&pstr->cur_state))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
295 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
296 /* In case of a singlebyte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
297 pstr->mbs[byte_idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
298 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
299 /* The next step uses the assumption that wchar_t is encoded
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
300 ASCII-safe: all ASCII values can be converted like this. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
301 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
302 ++byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
303 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
304 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
305
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
306 remain_len = end_idx - byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
307 prev_st = pstr->cur_state;
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
308 mbclen = __mbrtowc (&wc,
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
309 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
310 + byte_idx), remain_len, &pstr->cur_state);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
311 if (BE (mbclen < (size_t) -2, 1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
312 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
313 wchar_t wcu = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
314 if (iswlower (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
315 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
316 size_t mbcdlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
317
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
318 wcu = towupper (wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
319 mbcdlen = wcrtomb (buf, wcu, &prev_st);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
320 if (BE (mbclen == mbcdlen, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
321 memcpy (pstr->mbs + byte_idx, buf, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
322 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
323 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
324 src_idx = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
325 goto offsets_needed;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
326 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
327 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
328 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
329 memcpy (pstr->mbs + byte_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
330 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
331 pstr->wcs[byte_idx++] = wcu;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
332 /* Write paddings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
333 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
334 pstr->wcs[byte_idx++] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
335 }
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
336 else if (mbclen == (size_t) -1 || mbclen == 0
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
337 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
338 {
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
339 /* It is an invalid character, an incomplete character
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
340 at the end of the string, or '\0'. Just use the byte. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
341 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
342 pstr->mbs[byte_idx] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
343 /* And also cast it to wide char. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
344 pstr->wcs[byte_idx++] = (wchar_t) ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
345 if (BE (mbclen == (size_t) -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
346 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
347 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
348 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
349 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
350 /* The buffer doesn't have enough space, finish to build. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
351 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
352 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
353 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
354 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
355 pstr->valid_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
356 pstr->valid_raw_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
357 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
358 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
359 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
360 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
361 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
362 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
363 const char *p;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
364 offsets_needed:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
365 remain_len = end_idx - byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
366 prev_st = pstr->cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
367 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
368 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
369 int i, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
370
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
371 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
372 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
373 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
374 buf[i] = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
375 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
376 p = (const char *) buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
377 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
378 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
379 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
380 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
381 if (BE (mbclen < (size_t) -2, 1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
382 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
383 wchar_t wcu = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
384 if (iswlower (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
385 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
386 size_t mbcdlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
387
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
388 wcu = towupper (wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
389 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
390 if (BE (mbclen == mbcdlen, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
391 memcpy (pstr->mbs + byte_idx, buf, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
392 else if (mbcdlen != (size_t) -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
393 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
394 size_t i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
395
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
396 if (byte_idx + mbcdlen > pstr->bufs_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
397 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
398 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
399 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
400 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
401
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
402 if (pstr->offsets == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
403 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
404 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
405
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
406 if (pstr->offsets == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
407 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
408 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
409 if (!pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
410 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
411 for (i = 0; i < (size_t) byte_idx; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
412 pstr->offsets[i] = i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
413 pstr->offsets_needed = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
414 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
415
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
416 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
417 pstr->wcs[byte_idx] = wcu;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
418 pstr->offsets[byte_idx] = src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
419 for (i = 1; i < mbcdlen; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
420 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
421 pstr->offsets[byte_idx + i]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
422 = src_idx + (i < mbclen ? i : mbclen - 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
423 pstr->wcs[byte_idx + i] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
424 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
425 pstr->len += mbcdlen - mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
426 if (pstr->raw_stop > src_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
427 pstr->stop += mbcdlen - mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
428 end_idx = (pstr->bufs_len > pstr->len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
429 ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
430 byte_idx += mbcdlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
431 src_idx += mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
432 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
433 }
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
434 else
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
435 memcpy (pstr->mbs + byte_idx, p, mbclen);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
436 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
437 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
438 memcpy (pstr->mbs + byte_idx, p, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
439
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
440 if (BE (pstr->offsets_needed != 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
441 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
442 size_t i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
443 for (i = 0; i < mbclen; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
444 pstr->offsets[byte_idx + i] = src_idx + i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
445 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
446 src_idx += mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
447
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
448 pstr->wcs[byte_idx++] = wcu;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
449 /* Write paddings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
450 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
451 pstr->wcs[byte_idx++] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
452 }
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
453 else if (mbclen == (size_t) -1 || mbclen == 0
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
454 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
455 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
456 /* It is an invalid character or '\0'. Just use the byte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
457 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
458
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
459 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
460 ch = pstr->trans [ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
461 pstr->mbs[byte_idx] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
462
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
463 if (BE (pstr->offsets_needed != 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
464 pstr->offsets[byte_idx] = src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
465 ++src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
466
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
467 /* And also cast it to wide char. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
468 pstr->wcs[byte_idx++] = (wchar_t) ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
469 if (BE (mbclen == (size_t) -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
470 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
471 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
472 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
473 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
474 /* The buffer doesn't have enough space, finish to build. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
475 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
476 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
477 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
478 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
479 pstr->valid_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
480 pstr->valid_raw_len = src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
481 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
482 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
483
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
484 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
485 Return the index. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
486
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
487 static Idx
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
488 internal_function
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
489 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
490 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
491 mbstate_t prev_st;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
492 Idx rawbuf_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
493 size_t mbclen;
7124
a61f08bc4d0f * regex_internal.c (re_string_skip_chars): Don't assume WEOF fits
Paul Eggert <eggert@cs.ucla.edu>
parents: 7110
diff changeset
494 wint_t wc = WEOF;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
495
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
496 /* Skip the characters which are not necessary to check. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
497 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
498 rawbuf_idx < new_raw_idx;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
499 {
7124
a61f08bc4d0f * regex_internal.c (re_string_skip_chars): Don't assume WEOF fits
Paul Eggert <eggert@cs.ucla.edu>
parents: 7110
diff changeset
500 wchar_t wc2;
16387
53295e65981f regex: fix false multibyte matches in some regular expressions
Stanislav Brabec <sbrabec@suse.cz>
parents: 16366
diff changeset
501 Idx remain_len = pstr->raw_len - rawbuf_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
502 prev_st = pstr->cur_state;
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
503 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
504 remain_len, &pstr->cur_state);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
505 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
506 {
7110
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
507 /* We treat these cases as a single byte character. */
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
508 if (mbclen == 0 || remain_len == 0)
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
509 wc = L'\0';
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
510 else
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
511 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
512 mbclen = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
513 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
514 }
7124
a61f08bc4d0f * regex_internal.c (re_string_skip_chars): Don't assume WEOF fits
Paul Eggert <eggert@cs.ucla.edu>
parents: 7110
diff changeset
515 else
a61f08bc4d0f * regex_internal.c (re_string_skip_chars): Don't assume WEOF fits
Paul Eggert <eggert@cs.ucla.edu>
parents: 7110
diff changeset
516 wc = wc2;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
517 /* Then proceed the next character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
518 rawbuf_idx += mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
519 }
7124
a61f08bc4d0f * regex_internal.c (re_string_skip_chars): Don't assume WEOF fits
Paul Eggert <eggert@cs.ucla.edu>
parents: 7110
diff changeset
520 *last_wc = wc;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
521 return rawbuf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
522 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
523 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
524
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
525 /* Build the buffer PSTR->MBS, and apply the translation if we need.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
526 This function is used in case of REG_ICASE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
527
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
528 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
529 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
530 build_upper_buffer (re_string_t *pstr)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
531 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
532 Idx char_idx, end_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
533 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
534
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
535 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
536 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
537 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
538 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
539 ch = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
540 if (islower (ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
541 pstr->mbs[char_idx] = toupper (ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
542 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
543 pstr->mbs[char_idx] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
544 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
545 pstr->valid_len = char_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
546 pstr->valid_raw_len = char_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
547 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
548
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
549 /* Apply TRANS to the buffer in PSTR. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
550
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
551 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
552 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
553 re_string_translate_buffer (re_string_t *pstr)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
554 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
555 Idx buf_idx, end_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
556 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
557
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
558 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
559 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
560 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
561 pstr->mbs[buf_idx] = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
562 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
563
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
564 pstr->valid_len = buf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
565 pstr->valid_raw_len = buf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
566 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
567
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
568 /* This function re-construct the buffers.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
569 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
570 convert to upper case in case of REG_ICASE, apply translation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
571
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
572 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
573 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
574 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
575 {
6186
7a0537a5ad1b * lib/regex_internal.c (re_string_reconstruct): Don't assume buffer
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
576 Idx offset;
7a0537a5ad1b * lib/regex_internal.c (re_string_reconstruct): Don't assume buffer
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
577
7a0537a5ad1b * lib/regex_internal.c (re_string_reconstruct): Don't assume buffer
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
578 if (BE (pstr->raw_mbs_idx <= idx, 0))
7a0537a5ad1b * lib/regex_internal.c (re_string_reconstruct): Don't assume buffer
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
579 offset = idx - pstr->raw_mbs_idx;
7a0537a5ad1b * lib/regex_internal.c (re_string_reconstruct): Don't assume buffer
Paul Eggert <eggert@cs.ucla.edu>
parents: 6185
diff changeset
580 else
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
581 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
582 /* Reset buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
583 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
584 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
585 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
586 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
587 pstr->len = pstr->raw_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
588 pstr->stop = pstr->raw_stop;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
589 pstr->valid_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
590 pstr->raw_mbs_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
591 pstr->valid_raw_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
592 pstr->offsets_needed = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
593 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
594 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
595 if (!pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
596 pstr->mbs = (unsigned char *) pstr->raw_mbs;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
597 offset = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
598 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
599
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
600 if (BE (offset != 0, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
601 {
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
602 /* Should the already checked characters be kept? */
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
603 if (BE (offset < pstr->valid_raw_len, 1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
604 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
605 /* Yes, move them to the front of the buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
606 #ifdef RE_ENABLE_I18N
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
607 if (BE (pstr->offsets_needed, 0))
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
608 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
609 Idx low = 0, high = pstr->valid_len, mid;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
610 do
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
611 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
612 mid = (high + low) / 2;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
613 if (pstr->offsets[mid] > offset)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
614 high = mid;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
615 else if (pstr->offsets[mid] < offset)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
616 low = mid + 1;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
617 else
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
618 break;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
619 }
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
620 while (low < high);
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
621 if (pstr->offsets[mid] < offset)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
622 ++mid;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
623 pstr->tip_context = re_string_context_at (pstr, mid - 1,
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
624 eflags);
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
625 /* This can be quite complicated, so handle specially
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
626 only the common and easy case where the character with
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
627 different length representation of lower and upper
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
628 case is present at or after offset. */
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
629 if (pstr->valid_len > offset
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
630 && mid == offset && pstr->offsets[mid] == offset)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
631 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
632 memmove (pstr->wcs, pstr->wcs + offset,
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
633 (pstr->valid_len - offset) * sizeof (wint_t));
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
634 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
635 pstr->valid_len -= offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
636 pstr->valid_raw_len -= offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
637 for (low = 0; low < pstr->valid_len; low++)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
638 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
639 }
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
640 else
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
641 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
642 /* Otherwise, just find out how long the partial multibyte
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
643 character at offset is and fill it with WEOF/255. */
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
644 pstr->len = pstr->raw_len - idx + offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
645 pstr->stop = pstr->raw_stop - idx + offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
646 pstr->offsets_needed = 0;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
647 while (mid > 0 && pstr->offsets[mid - 1] == offset)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
648 --mid;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
649 while (mid < pstr->valid_len)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
650 if (pstr->wcs[mid] != WEOF)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
651 break;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
652 else
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
653 ++mid;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
654 if (mid == pstr->valid_len)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
655 pstr->valid_len = 0;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
656 else
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
657 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
658 pstr->valid_len = pstr->offsets[mid] - offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
659 if (pstr->valid_len)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
660 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
661 for (low = 0; low < pstr->valid_len; ++low)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
662 pstr->wcs[low] = WEOF;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
663 memset (pstr->mbs, 255, pstr->valid_len);
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
664 }
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
665 }
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
666 pstr->valid_raw_len = pstr->valid_len;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
667 }
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
668 }
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
669 else
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
670 #endif
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
671 {
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
672 pstr->tip_context = re_string_context_at (pstr, offset - 1,
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
673 eflags);
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
674 #ifdef RE_ENABLE_I18N
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
675 if (pstr->mb_cur_max > 1)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
676 memmove (pstr->wcs, pstr->wcs + offset,
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
677 (pstr->valid_len - offset) * sizeof (wint_t));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
678 #endif /* RE_ENABLE_I18N */
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
679 if (BE (pstr->mbs_allocated, 0))
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
680 memmove (pstr->mbs, pstr->mbs + offset,
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
681 pstr->valid_len - offset);
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
682 pstr->valid_len -= offset;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
683 pstr->valid_raw_len -= offset;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
684 #if DEBUG
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
685 assert (pstr->valid_len > 0);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
686 #endif
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
687 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
688 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
689 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
690 {
10955
5c229a7fba2a Avoid gcc warnings on cygwin.
Eric Blake <ebb9@byu.net>
parents: 10075
diff changeset
691 #ifdef RE_ENABLE_I18N
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
692 /* No, skip all characters until IDX. */
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
693 Idx prev_valid_len = pstr->valid_len;
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
694
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
695 if (BE (pstr->offsets_needed, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
696 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
697 pstr->len = pstr->raw_len - idx + offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
698 pstr->stop = pstr->raw_stop - idx + offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
699 pstr->offsets_needed = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
700 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
701 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
702 pstr->valid_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
703 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
704 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
705 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
706 Idx wcs_idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
707 wint_t wc = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
708
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
709 if (pstr->is_utf8)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
710 {
7976
caf7dcb02c3d * lib/regex_internal.c (re_string_reconstruct): Remove declaration
Jim Meyering <jim@meyering.net>
parents: 7344
diff changeset
711 const unsigned char *raw, *p, *end;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
712
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
713 /* Special case UTF-8. Multi-byte chars start with any
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
714 byte other than 0x80 - 0xbf. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
715 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
716 end = raw + (offset - pstr->mb_cur_max);
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
717 if (end < pstr->raw_mbs)
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
718 end = pstr->raw_mbs;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
719 p = raw + offset - 1;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
720 #ifdef _LIBC
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
721 /* We know the wchar_t encoding is UCS4, so for the simple
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
722 case, ASCII characters, skip the conversion step. */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
723 if (isascii (*p) && BE (pstr->trans == NULL, 1))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
724 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
725 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
726 /* pstr->valid_len = 0; */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
727 wc = (wchar_t) *p;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
728 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
729 else
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
730 #endif
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
731 for (; p >= end; --p)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
732 if ((*p & 0xc0) != 0x80)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
733 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
734 mbstate_t cur_state;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
735 wchar_t wc2;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
736 Idx mlen = raw + pstr->len - p;
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
737 unsigned char buf[6];
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
738 size_t mbclen;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
739
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
740 const unsigned char *pp = p;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
741 if (BE (pstr->trans != NULL, 0))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
742 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
743 int i = mlen < 6 ? mlen : 6;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
744 while (--i >= 0)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
745 buf[i] = pstr->trans[p[i]];
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
746 pp = buf;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
747 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
748 /* XXX Don't use mbrtowc, we know which conversion
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
749 to use (UTF-8 -> UCS4). */
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
750 memset (&cur_state, 0, sizeof (cur_state));
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
751 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
10998
cc7a1af3872f regex: replace mbrtowc with __mbrtowc.
Paolo Bonzini <bonzini@gnu.org>
parents: 10955
diff changeset
752 &cur_state);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
753 if (raw + offset - p <= mbclen
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
754 && mbclen < (size_t) -2)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
755 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
756 memset (&pstr->cur_state, '\0',
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
757 sizeof (mbstate_t));
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
758 pstr->valid_len = mbclen - (raw + offset - p);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
759 wc = wc2;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
760 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
761 break;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
762 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
763 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
764
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
765 if (wc == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
766 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
7110
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
767 if (wc == WEOF)
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
768 pstr->tip_context
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
769 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
7110
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
770 else
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
771 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
772 && IS_WIDE_WORD_CHAR (wc))
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
773 ? CONTEXT_WORD
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
774 : ((IS_WIDE_NEWLINE (wc)
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
775 && pstr->newline_anchor)
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
776 ? CONTEXT_NEWLINE : 0));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
777 if (BE (pstr->valid_len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
778 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
779 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
780 pstr->wcs[wcs_idx] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
781 if (pstr->mbs_allocated)
7344
72030a247f35 * lib/regex_internal.c (re_string_reconstruct): Handle
Paul Eggert <eggert@cs.ucla.edu>
parents: 7307
diff changeset
782 memset (pstr->mbs, 255, pstr->valid_len);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
783 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
784 pstr->valid_raw_len = pstr->valid_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
785 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
786 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
787 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
788 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
789 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
7110
f4969cab8e44 Import the following changes from libc:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6726
diff changeset
790 pstr->valid_raw_len = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
791 if (pstr->trans)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
792 c = pstr->trans[c];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
793 pstr->tip_context = (bitset_contain (pstr->word_char, c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
794 ? CONTEXT_WORD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
795 : ((IS_NEWLINE (c) && pstr->newline_anchor)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
796 ? CONTEXT_NEWLINE : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
797 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
798 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
799 if (!BE (pstr->mbs_allocated, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
800 pstr->mbs += offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
801 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
802 pstr->raw_mbs_idx = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
803 pstr->len -= offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
804 pstr->stop -= offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
805
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
806 /* Then build the buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
807 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
808 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
809 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
810 if (pstr->icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
811 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
812 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
813 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
814 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
815 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
816 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
817 build_wcs_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
818 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
819 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
820 #endif /* RE_ENABLE_I18N */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
821 if (BE (pstr->mbs_allocated, 0))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
822 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
823 if (pstr->icase)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
824 build_upper_buffer (pstr);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
825 else if (pstr->trans != NULL)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
826 re_string_translate_buffer (pstr);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
827 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
828 else
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
829 pstr->valid_len = pstr->len;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
830
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
831 pstr->cur_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
832 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
833 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
834
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
835 static unsigned char
6129
2fce0cb93f59 * config/srclist.text: Add glibc bug 1248.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
836 internal_function __attribute ((pure))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
837 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
838 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
839 int ch;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
840 Idx off;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
841
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
842 /* Handle the common (easiest) cases first. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
843 if (BE (!pstr->mbs_allocated, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
844 return re_string_peek_byte (pstr, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
845
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
846 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
847 if (pstr->mb_cur_max > 1
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
848 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
849 return re_string_peek_byte (pstr, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
850 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
851
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
852 off = pstr->cur_idx + idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
853 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
854 if (pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
855 off = pstr->offsets[off];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
856 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
857
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
858 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
859
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
860 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
861 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
862 this function returns CAPITAL LETTER I instead of first byte of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
863 DOTLESS SMALL LETTER I. The latter would confuse the parser,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
864 since peek_byte_case doesn't advance cur_idx in any way. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
865 if (pstr->offsets_needed && !isascii (ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
866 return re_string_peek_byte (pstr, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
867 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
868
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
869 return ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
870 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
871
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
872 static unsigned char
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
873 internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
874 re_string_fetch_byte_case (re_string_t *pstr)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
875 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
876 if (BE (!pstr->mbs_allocated, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
877 return re_string_fetch_byte (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
878
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
879 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
880 if (pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
881 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
882 Idx off;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
883 int ch;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
884
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
885 /* For tr_TR.UTF-8 [[:islower:]] there is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
886 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
887 in that case the whole multi-byte character and return
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
888 the original letter. On the other side, with
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
889 [[: DOTLESS SMALL LETTER I return [[:I, as doing
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
890 anything else would complicate things too much. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
891
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
892 if (!re_string_first_byte (pstr, pstr->cur_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
893 return re_string_fetch_byte (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
894
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
895 off = pstr->offsets[pstr->cur_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
896 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
897
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
898 if (! isascii (ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
899 return re_string_fetch_byte (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
900
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
901 re_string_skip_bytes (pstr,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
902 re_string_char_size_at (pstr, pstr->cur_idx));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
903 return ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
904 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
905 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
906
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
907 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
908 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
909
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
910 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
911 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
912 re_string_destruct (re_string_t *pstr)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
913 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
914 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
915 re_free (pstr->wcs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
916 re_free (pstr->offsets);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
917 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
918 if (pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
919 re_free (pstr->mbs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
920 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
921
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
922 /* Return the context at IDX in INPUT. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
923
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
924 static unsigned int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
925 internal_function
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
926 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
927 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
928 int c;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
929 if (BE (! REG_VALID_INDEX (idx), 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
930 /* In this case, we use the value stored in input->tip_context,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
931 since we can't know the character in input->mbs[-1] here. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
932 return input->tip_context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
933 if (BE (idx == input->len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
934 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
935 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
936 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
937 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
938 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
939 wint_t wc;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
940 Idx wc_idx = idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
941 while(input->wcs[wc_idx] == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
942 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
943 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
944 /* It must not happen. */
6197
4b3066daba2f * regex_internal.c (re_string_context_at): Fix bug where the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6196
diff changeset
945 assert (REG_VALID_INDEX (wc_idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
946 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
947 --wc_idx;
6197
4b3066daba2f * regex_internal.c (re_string_context_at): Fix bug where the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6196
diff changeset
948 if (! REG_VALID_INDEX (wc_idx))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
949 return input->tip_context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
950 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
951 wc = input->wcs[wc_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
952 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
953 return CONTEXT_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
954 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
955 ? CONTEXT_NEWLINE : 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
956 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
957 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
958 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
959 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
960 c = re_string_byte_at (input, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
961 if (bitset_contain (input->word_char, c))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
962 return CONTEXT_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
963 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
964 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
965 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
966
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
967 /* Functions for set operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
969 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
970 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
971 re_node_set_alloc (re_node_set *set, Idx size)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
972 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
973 set->alloc = size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
974 set->nelem = 0;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
975 set->elems = re_malloc (Idx, size);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
976 if (BE (set->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
977 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
978 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
979 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
980
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
981 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
982 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
983 re_node_set_init_1 (re_node_set *set, Idx elem)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
984 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
985 set->alloc = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
986 set->nelem = 1;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
987 set->elems = re_malloc (Idx, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
988 if (BE (set->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
989 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
990 set->alloc = set->nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
991 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
992 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
993 set->elems[0] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
994 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
995 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
996
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
997 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
998 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
999 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1000 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1001 set->alloc = 2;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1002 set->elems = re_malloc (Idx, 2);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1003 if (BE (set->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1004 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1005 if (elem1 == elem2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1006 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1007 set->nelem = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1008 set->elems[0] = elem1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1009 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1010 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1011 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1012 set->nelem = 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1013 if (elem1 < elem2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1014 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1015 set->elems[0] = elem1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1016 set->elems[1] = elem2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1017 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1018 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1019 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1020 set->elems[0] = elem2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1021 set->elems[1] = elem1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1022 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1023 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1024 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1025 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1026
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1027 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1028 internal_function __attribute_warn_unused_result__
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1029 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1030 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1031 dest->nelem = src->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1032 if (src->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1033 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1034 dest->alloc = dest->nelem;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1035 dest->elems = re_malloc (Idx, dest->alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1036 if (BE (dest->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1037 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1038 dest->alloc = dest->nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1039 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1040 }
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1041 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1042 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1043 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1044 re_node_set_init_empty (dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1045 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1046 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1047
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1048 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1049 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1050 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1051
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1052 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1053 internal_function __attribute_warn_unused_result__
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1054 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1055 const re_node_set *src2)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1056 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1057 Idx i1, i2, is, id, delta, sbase;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1058 if (src1->nelem == 0 || src2->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1059 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1060
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1061 /* We need dest->nelem + 2 * elems_in_intersection; this is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1062 conservative estimate. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1063 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1064 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1065 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1066 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1067 if (BE (new_elems == NULL, 0))
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1068 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1069 dest->elems = new_elems;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1070 dest->alloc = new_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1071 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1072
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1073 /* Find the items in the intersection of SRC1 and SRC2, and copy
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1074 into the top of DEST those that are not already in DEST itself. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1075 sbase = dest->nelem + src1->nelem + src2->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1076 i1 = src1->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1077 i2 = src2->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1078 id = dest->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1079 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1080 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1081 if (src1->elems[i1] == src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1082 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1083 /* Try to find the item in DEST. Maybe we could binary search? */
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1084 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1085 --id;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1086
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1087 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1088 dest->elems[--sbase] = src1->elems[i1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1089
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1090 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1091 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1092 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1093
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1094 /* Lower the highest of the two items. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1095 else if (src1->elems[i1] < src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1096 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1097 if (! REG_VALID_INDEX (--i2))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1098 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1099 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1100 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1101 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1102 if (! REG_VALID_INDEX (--i1))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1103 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1104 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1105 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1106
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1107 id = dest->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1108 is = dest->nelem + src1->nelem + src2->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1109 delta = is - sbase + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1110
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1111 /* Now copy. When DELTA becomes zero, the remaining
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1112 DEST elements are already in place; this is more or
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1113 less the same loop that is in re_node_set_merge. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1114 dest->nelem += delta;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1115 if (delta > 0 && REG_VALID_INDEX (id))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1116 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1117 {
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1118 if (dest->elems[is] > dest->elems[id])
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1119 {
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1120 /* Copy from the top. */
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1121 dest->elems[id + delta--] = dest->elems[is--];
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1122 if (delta == 0)
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1123 break;
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1124 }
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1125 else
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1126 {
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1127 /* Slide from the bottom. */
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1128 dest->elems[id + delta] = dest->elems[id];
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1129 if (! REG_VALID_INDEX (--id))
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1130 break;
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1131 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1132 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1133
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1134 /* Copy remaining SRC elements. */
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1135 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1136
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1137 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1138 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1139
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1140 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1141 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1142
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1143 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1144 internal_function __attribute_warn_unused_result__
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1145 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1146 const re_node_set *src2)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1147 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1148 Idx i1, i2, id;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1149 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1150 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1151 dest->alloc = src1->nelem + src2->nelem;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1152 dest->elems = re_malloc (Idx, dest->alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1153 if (BE (dest->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1154 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1155 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1156 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1157 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1158 if (src1 != NULL && src1->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1159 return re_node_set_init_copy (dest, src1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1160 else if (src2 != NULL && src2->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1161 return re_node_set_init_copy (dest, src2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1162 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1163 re_node_set_init_empty (dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1164 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1165 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1166 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1167 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1168 if (src1->elems[i1] > src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1169 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1170 dest->elems[id++] = src2->elems[i2++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1171 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1172 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1173 if (src1->elems[i1] == src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1174 ++i2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1175 dest->elems[id++] = src1->elems[i1++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1176 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1177 if (i1 < src1->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1178 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1179 memcpy (dest->elems + id, src1->elems + i1,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1180 (src1->nelem - i1) * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1181 id += src1->nelem - i1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1182 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1183 else if (i2 < src2->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1184 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1185 memcpy (dest->elems + id, src2->elems + i2,
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1186 (src2->nelem - i2) * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1187 id += src2->nelem - i2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1188 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1189 dest->nelem = id;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1190 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1191 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1192
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1193 /* Calculate the union set of the sets DEST and SRC. And store it to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1194 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1195
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1196 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1197 internal_function __attribute_warn_unused_result__
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1198 re_node_set_merge (re_node_set *dest, const re_node_set *src)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1199 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1200 Idx is, id, sbase, delta;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1201 if (src == NULL || src->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1202 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1203 if (dest->alloc < 2 * src->nelem + dest->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1204 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1205 Idx new_alloc = 2 * (src->nelem + dest->alloc);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1206 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1207 if (BE (new_buffer == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1208 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1209 dest->elems = new_buffer;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1210 dest->alloc = new_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1211 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1212
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1213 if (BE (dest->nelem == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1214 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1215 dest->nelem = src->nelem;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1216 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1217 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1218 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1219
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1220 /* Copy into the top of DEST the items of SRC that are not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1221 found in DEST. Maybe we could binary search in DEST? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1222 for (sbase = dest->nelem + 2 * src->nelem,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1223 is = src->nelem - 1, id = dest->nelem - 1;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1224 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1225 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1226 if (dest->elems[id] == src->elems[is])
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1227 is--, id--;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1228 else if (dest->elems[id] < src->elems[is])
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1229 dest->elems[--sbase] = src->elems[is--];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1230 else /* if (dest->elems[id] > src->elems[is]) */
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1231 --id;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1232 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1233
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1234 if (REG_VALID_INDEX (is))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1235 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1236 /* If DEST is exhausted, the remaining items of SRC must be unique. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1237 sbase -= is + 1;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1238 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1239 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1240
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1241 id = dest->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1242 is = dest->nelem + 2 * src->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1243 delta = is - sbase + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1244 if (delta == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1245 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1246
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1247 /* Now copy. When DELTA becomes zero, the remaining
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1248 DEST elements are already in place. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1249 dest->nelem += delta;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1250 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1251 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1252 if (dest->elems[is] > dest->elems[id])
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1253 {
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1254 /* Copy from the top. */
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1255 dest->elems[id + delta--] = dest->elems[is--];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1256 if (delta == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1257 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1258 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1259 else
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1260 {
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1261 /* Slide from the bottom. */
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1262 dest->elems[id + delta] = dest->elems[id];
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1263 if (! REG_VALID_INDEX (--id))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1264 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1265 /* Copy remaining SRC elements. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1266 memcpy (dest->elems, dest->elems + sbase,
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1267 delta * sizeof (Idx));
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1268 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1269 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1270 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1271 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1272
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1273 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1274 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1275
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1276 /* Insert the new element ELEM to the re_node_set* SET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1277 SET should not already have ELEM.
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1278 Return true if successful. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1279
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1280 static bool
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1281 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1282 re_node_set_insert (re_node_set *set, Idx elem)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1283 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1284 Idx idx;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1285 /* In case the set is empty. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1286 if (set->alloc == 0)
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1287 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1288
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1289 if (BE (set->nelem, 0) == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1290 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1291 /* We already guaranteed above that set->alloc != 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1292 set->elems[0] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1293 ++set->nelem;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1294 return true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1295 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1296
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1297 /* Realloc if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1298 if (set->alloc == set->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1299 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1300 Idx *new_elems;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1301 set->alloc = set->alloc * 2;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1302 new_elems = re_realloc (set->elems, Idx, set->alloc);
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1303 if (BE (new_elems == NULL, 0))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1304 return false;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1305 set->elems = new_elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1306 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1307
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1308 /* Move the elements which follows the new element. Test the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1309 first element separately to skip a check in the inner loop. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1310 if (elem < set->elems[0])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1311 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1312 idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1313 for (idx = set->nelem; idx > 0; idx--)
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1314 set->elems[idx] = set->elems[idx - 1];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1315 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1316 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1317 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1318 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
12833
f6972e309c30 regex: sync more white-space changes from libc
Jim Meyering <meyering@redhat.com>
parents: 12832
diff changeset
1319 set->elems[idx] = set->elems[idx - 1];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1320 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1321
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1322 /* Insert the new element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1323 set->elems[idx] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1324 ++set->nelem;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1325 return true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1326 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1327
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1328 /* Insert the new element ELEM to the re_node_set* SET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1329 SET should not already have any element greater than or equal to ELEM.
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1330 Return true if successful. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1331
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1332 static bool
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1333 internal_function __attribute_warn_unused_result__
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1334 re_node_set_insert_last (re_node_set *set, Idx elem)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1335 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1336 /* Realloc if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1337 if (set->alloc == set->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1338 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1339 Idx *new_elems;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1340 set->alloc = (set->alloc + 1) * 2;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1341 new_elems = re_realloc (set->elems, Idx, set->alloc);
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1342 if (BE (new_elems == NULL, 0))
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1343 return false;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1344 set->elems = new_elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1345 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1346
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1347 /* Insert the new element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1348 set->elems[set->nelem++] = elem;
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1349 return true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1350 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1351
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1352 /* Compare two node sets SET1 and SET2.
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1353 Return true if SET1 and SET2 are equivalent. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1354
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1355 static bool
6129
2fce0cb93f59 * config/srclist.text: Add glibc bug 1248.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1356 internal_function __attribute ((pure))
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1357 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1358 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1359 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1360 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1361 return false;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1362 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1363 if (set1->elems[i] != set2->elems[i])
6195
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1364 return false;
25eaa608fc4e Use bool where appropriate.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6186
diff changeset
1365 return true;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1366 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1367
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1368 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1369
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1370 static Idx
6129
2fce0cb93f59 * config/srclist.text: Add glibc bug 1248.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1371 internal_function __attribute ((pure))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1372 re_node_set_contains (const re_node_set *set, Idx elem)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1373 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1374 __re_size_t idx, right, mid;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1375 if (! REG_VALID_NONZERO_INDEX (set->nelem))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1376 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1377
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1378 /* Binary search the element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1379 idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1380 right = set->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1381 while (idx < right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1382 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1383 mid = (idx + right) / 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1384 if (set->elems[mid] < elem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1385 idx = mid + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1386 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1387 right = mid;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1388 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1389 return set->elems[idx] == elem ? idx + 1 : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1390 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1391
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1392 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1393 internal_function
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1394 re_node_set_remove_at (re_node_set *set, Idx idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1395 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1396 if (idx < 0 || idx >= set->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1397 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1398 --set->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1399 for (; idx < set->nelem; idx++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1400 set->elems[idx] = set->elems[idx + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1401 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1402
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1403
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1404 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1405 Or return REG_MISSING if an error occurred. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1406
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1407 static Idx
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1408 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1409 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1410 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1411 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1412 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1413 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1414 Idx *new_nexts, *new_indices;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1415 re_node_set *new_edests, *new_eclosures;
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1416 re_token_t *new_nodes;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1417
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
1418 /* Avoid overflows in realloc. */
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
1419 const size_t max_object_size = MAX (sizeof (re_token_t),
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
1420 MAX (sizeof (re_node_set),
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
1421 sizeof (Idx)));
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
1422 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1423 return REG_MISSING;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1424
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1425 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1426 if (BE (new_nodes == NULL, 0))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1427 return REG_MISSING;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1428 dfa->nodes = new_nodes;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1429 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1430 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1431 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1432 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1433 if (BE (new_nexts == NULL || new_indices == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1434 || new_edests == NULL || new_eclosures == NULL, 0))
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1435 return REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1436 dfa->nexts = new_nexts;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1437 dfa->org_indices = new_indices;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1438 dfa->edests = new_edests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1439 dfa->eclosures = new_eclosures;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1440 dfa->nodes_alloc = new_nodes_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1441 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1442 dfa->nodes[dfa->nodes_len] = token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1443 dfa->nodes[dfa->nodes_len].constraint = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1444 #ifdef RE_ENABLE_I18N
7307
87cb6e41fd09 Avoid a warning about an unused variable.
Jim Meyering <jim@meyering.net>
parents: 7124
diff changeset
1445 {
87cb6e41fd09 Avoid a warning about an unused variable.
Jim Meyering <jim@meyering.net>
parents: 7124
diff changeset
1446 int type = token.type;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1447 dfa->nodes[dfa->nodes_len].accept_mb =
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1448 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
7307
87cb6e41fd09 Avoid a warning about an unused variable.
Jim Meyering <jim@meyering.net>
parents: 7124
diff changeset
1449 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1450 #endif
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1451 dfa->nexts[dfa->nodes_len] = REG_MISSING;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1452 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1453 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1454 return dfa->nodes_len++;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1455 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1456
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1457 static inline re_hashval_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1458 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1459 calc_state_hash (const re_node_set *nodes, unsigned int context)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1460 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1461 re_hashval_t hash = nodes->nelem + context;
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1462 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1463 for (i = 0 ; i < nodes->nelem ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1464 hash += nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1465 return hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1466 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1467
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1468 /* Search for the state whose node_set is equivalent to NODES.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1469 Return the pointer to the state, if we found it in the DFA.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1470 Otherwise create the new one and return it. In case of an error
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1471 return NULL and set the error code in ERR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1472 Note: - We assume NULL as the invalid state, then it is possible that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1473 return value is NULL and ERR is REG_NOERROR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1474 - We never return non-NULL value in case of any errors, it is for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1475 optimization. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1476
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1477 static re_dfastate_t *
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1478 internal_function __attribute_warn_unused_result__
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1479 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1480 const re_node_set *nodes)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1481 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1482 re_hashval_t hash;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1483 re_dfastate_t *new_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1484 struct re_state_table_entry *spot;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1485 Idx i;
6073
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1486 #ifdef lint
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1487 /* Suppress bogus uninitialized-variable warnings. */
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1488 *err = REG_NOERROR;
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1489 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1490 if (BE (nodes->nelem == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1491 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1492 *err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1493 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1494 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1495 hash = calc_state_hash (nodes, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1496 spot = dfa->state_table + (hash & dfa->state_hash_mask);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1497
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1498 for (i = 0 ; i < spot->num ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1499 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1500 re_dfastate_t *state = spot->array[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1501 if (hash != state->hash)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1502 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1503 if (re_node_set_compare (&state->nodes, nodes))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1504 return state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1505 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1506
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1507 /* There are no appropriate state in the dfa, create the new one. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1508 new_state = create_ci_newstate (dfa, nodes, hash);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1509 if (BE (new_state == NULL, 0))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1510 *err = REG_ESPACE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1511
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1512 return new_state;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1513 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1514
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1515 /* Search for the state whose node_set is equivalent to NODES and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1516 whose context is equivalent to CONTEXT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1517 Return the pointer to the state, if we found it in the DFA.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1518 Otherwise create the new one and return it. In case of an error
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1519 return NULL and set the error code in ERR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1520 Note: - We assume NULL as the invalid state, then it is possible that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1521 return value is NULL and ERR is REG_NOERROR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1522 - We never return non-NULL value in case of any errors, it is for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1523 optimization. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1524
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1525 static re_dfastate_t *
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1526 internal_function __attribute_warn_unused_result__
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1527 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6073
diff changeset
1528 const re_node_set *nodes, unsigned int context)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1529 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1530 re_hashval_t hash;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1531 re_dfastate_t *new_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1532 struct re_state_table_entry *spot;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1533 Idx i;
6073
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1534 #ifdef lint
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1535 /* Suppress bogus uninitialized-variable warnings. */
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1536 *err = REG_NOERROR;
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1537 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1538 if (nodes->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1539 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1540 *err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1541 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1542 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1543 hash = calc_state_hash (nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1544 spot = dfa->state_table + (hash & dfa->state_hash_mask);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1545
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1546 for (i = 0 ; i < spot->num ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1547 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1548 re_dfastate_t *state = spot->array[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1549 if (state->hash == hash
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1550 && state->context == context
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1551 && re_node_set_compare (state->entrance_nodes, nodes))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1552 return state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1553 }
16235
18a38c9615f0 In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents: 16201
diff changeset
1554 /* There are no appropriate state in 'dfa', create the new one. */
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1555 new_state = create_cd_newstate (dfa, nodes, context, hash);
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1556 if (BE (new_state == NULL, 0))
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1557 *err = REG_ESPACE;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1558
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1559 return new_state;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1560 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1561
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1562 /* Finish initialization of the new state NEWSTATE, and using its hash value
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1563 HASH put in the appropriate bucket of DFA's state table. Return value
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1564 indicates the error code if failed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1565
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1566 static reg_errcode_t
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1567 __attribute_warn_unused_result__
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1568 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1569 re_hashval_t hash)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1570 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1571 struct re_state_table_entry *spot;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1572 reg_errcode_t err;
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1573 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1574
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1575 newstate->hash = hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1576 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1577 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1578 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1579 for (i = 0; i < newstate->nodes.nelem; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1580 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1581 Idx elem = newstate->nodes.elems[i];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1582 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
16361
ab59b5080051 regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16358
diff changeset
1583 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1584 return REG_ESPACE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1585 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1586
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1587 spot = dfa->state_table + (hash & dfa->state_hash_mask);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1588 if (BE (spot->alloc <= spot->num, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1589 {
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1590 Idx new_alloc = 2 * spot->num + 2;
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1591 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1592 new_alloc);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1593 if (BE (new_array == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1594 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1595 spot->array = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1596 spot->alloc = new_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1597 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1598 spot->array[spot->num++] = newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1599 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1600 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1601
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1602 static void
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1603 free_state (re_dfastate_t *state)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1604 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1605 re_node_set_free (&state->non_eps_nodes);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1606 re_node_set_free (&state->inveclosure);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1607 if (state->entrance_nodes != &state->nodes)
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1608 {
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1609 re_node_set_free (state->entrance_nodes);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1610 re_free (state->entrance_nodes);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1611 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1612 re_node_set_free (&state->nodes);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1613 re_free (state->word_trtable);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1614 re_free (state->trtable);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1615 re_free (state);
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1616 }
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1617
16358
a712776b11ce maint: spelling fixes
Paul Eggert <eggert@cs.ucla.edu>
parents: 16235
diff changeset
1618 /* Create the new state which is independent of contexts.
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1619 Return the new state if succeeded, otherwise return NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1621 static re_dfastate_t *
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1622 internal_function __attribute_warn_unused_result__
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
1623 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
1624 re_hashval_t hash)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1625 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1626 Idx i;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1627 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1628 re_dfastate_t *newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1629
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1630 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1631 if (BE (newstate == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1632 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1633 err = re_node_set_init_copy (&newstate->nodes, nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1634 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1635 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1636 re_free (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1637 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1638 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1639
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1640 newstate->entrance_nodes = &newstate->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1641 for (i = 0 ; i < nodes->nelem ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1642 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1643 re_token_t *node = dfa->nodes + nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1644 re_token_type_t type = node->type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1645 if (type == CHARACTER && !node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1646 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1647 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1648 newstate->accept_mb |= node->accept_mb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1649 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1650
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1651 /* If the state has the halt node, the state is a halt state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1652 if (type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1653 newstate->halt = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1654 else if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1655 newstate->has_backref = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1656 else if (type == ANCHOR || node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1657 newstate->has_constraint = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1658 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1659 err = register_state (dfa, newstate, hash);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1660 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1661 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1662 free_state (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1663 newstate = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1664 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1665 return newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1666 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1667
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1668 /* Create the new state which is depend on the context CONTEXT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1669 Return the new state if succeeded, otherwise return NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1670
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1671 static re_dfastate_t *
12832
185094053301 regex: add many uses of __attribute_warn_unused_result__
Jim Meyering <meyering@redhat.com>
parents: 12559
diff changeset
1672 internal_function __attribute_warn_unused_result__
6185
6b09f7f6ba73 * lib/regcomp.c (search_duplicated_node): Make first pointer arg
Paul Eggert <eggert@cs.ucla.edu>
parents: 6184
diff changeset
1673 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1674 unsigned int context, re_hashval_t hash)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1675 {
6184
f1728546eca4 On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents: 6129
diff changeset
1676 Idx i, nctx_nodes = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1677 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1678 re_dfastate_t *newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1679
6726
af9abbcedfbd Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents: 6236
diff changeset
1680 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1681 if (BE (newstate == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1682 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1683 err = re_node_set_init_copy (&newstate->nodes, nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1684 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1685 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1686 re_free (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1687 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1688 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1689
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1690 newstate->context = context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1691 newstate->entrance_nodes = &newstate->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1692
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1693 for (i = 0 ; i < nodes->nelem ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1694 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1695 re_token_t *node = dfa->nodes + nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1696 re_token_type_t type = node->type;
10075
cc7bfc9f7fc1 optimize double anchors such as ^$
Paolo Bonzini <bonzini@gnu.org>
parents: 7976
diff changeset
1697 unsigned int constraint = node->constraint;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1698
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1699 if (type == CHARACTER && !constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1700 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1701 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1702 newstate->accept_mb |= node->accept_mb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1703 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1704
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1705 /* If the state has the halt node, the state is a halt state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1706 if (type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1707 newstate->halt = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1708 else if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1709 newstate->has_backref = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1710
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1711 if (constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1712 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1713 if (newstate->entrance_nodes == &newstate->nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1714 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1715 newstate->entrance_nodes = re_malloc (re_node_set, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1716 if (BE (newstate->entrance_nodes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1717 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1718 free_state (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1719 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1720 }
12834
baafee3ff85b regex: do not ignore memory allocation failure
Jim Meyering <meyering@redhat.com>
parents: 12833
diff changeset
1721 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
baafee3ff85b regex: do not ignore memory allocation failure
Jim Meyering <meyering@redhat.com>
parents: 12833
diff changeset
1722 != REG_NOERROR)
baafee3ff85b regex: do not ignore memory allocation failure
Jim Meyering <meyering@redhat.com>
parents: 12833
diff changeset
1723 return NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1724 nctx_nodes = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1725 newstate->has_constraint = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1726 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1727
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1728 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1729 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1730 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1731 ++nctx_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1732 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1733 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1734 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1735 err = register_state (dfa, newstate, hash);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1736 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1737 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1738 free_state (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1739 newstate = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1740 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1741 return newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1742 }