annotate lib/regex_internal.c @ 6073:331a20f7531d

(re_acquire_state, re_acquire_state_context) [defined lint]: Suppress bogus uninitialized-variable warnings.
author Paul Eggert <eggert@cs.ucla.edu>
date Sat, 20 Aug 2005 00:58:13 +0000
parents 82e7d3903d95
children e2dd51f6e259
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1 /* Extended regular expression matching and search library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3 This file is part of the GNU C Library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
5
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
6 This program is free software; you can redistribute it and/or modify
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
8 the Free Software Foundation; either version 2, or (at your option)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
9 any later version.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
10
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
11 This program is distributed in the hope that it will be useful,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
14 GNU General Public License for more details.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
15
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
16 You should have received a copy of the GNU General Public License along
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
17 with this program; if not, write to the Free Software Foundation,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
19
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
20 static void re_string_construct_common (const char *str, int len,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
21 re_string_t *pstr,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
22 RE_TRANSLATE_TYPE trans, int icase,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
23 const re_dfa_t *dfa) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
24 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
25 static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
26 wint_t *last_wc) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
27 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
28 static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
29 unsigned int hash) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
30 static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
31 const re_node_set *nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
32 unsigned int hash) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
33 static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
34 const re_node_set *nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
35 unsigned int context,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
36 unsigned int hash) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
37 static unsigned int inline calc_state_hash (const re_node_set *nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
38 unsigned int context) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
39
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
40 /* Functions for string operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
41
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
42 /* This function allocate the buffers. It is necessary to call
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
43 re_string_reconstruct before using the object. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
44
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
45 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
46 re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
47 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
48 const char *str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
49 int len, init_len, icase;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
50 RE_TRANSLATE_TYPE trans;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
51 const re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
52 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
53 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
54 int init_buf_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
55
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
56 /* Ensure at least one character fits into the buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
57 if (init_len < dfa->mb_cur_max)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
58 init_len = dfa->mb_cur_max;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
59 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
60 re_string_construct_common (str, len, pstr, trans, icase, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
61
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
62 ret = re_string_realloc_buffers (pstr, init_buf_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
63 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
64 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
65
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
66 pstr->word_char = dfa->word_char;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
67 pstr->word_ops_used = dfa->word_ops_used;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
68 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
69 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
70 pstr->valid_raw_len = pstr->valid_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
71 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
72 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
73
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
74 /* This function allocate the buffers, and initialize them. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
75
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
76 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
77 re_string_construct (pstr, str, len, trans, icase, dfa)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
78 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
79 const char *str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
80 int len, icase;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
81 RE_TRANSLATE_TYPE trans;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
82 const re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
83 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
84 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
85 memset (pstr, '\0', sizeof (re_string_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
86 re_string_construct_common (str, len, pstr, trans, icase, dfa);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
87
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
88 if (len > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
89 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
90 ret = re_string_realloc_buffers (pstr, len + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
91 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
92 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
93 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
94 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
95
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
96 if (icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
97 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
98 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
99 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
100 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
101 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
102 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
103 ret = build_wcs_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
104 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
105 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
106 if (pstr->valid_raw_len >= len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
107 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
108 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
109 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
110 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
111 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
112 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
113 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
114 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
115 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
116 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
117 build_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
118 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
119 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
120 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
121 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
122 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
123 build_wcs_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
124 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
125 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
126 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
127 if (trans != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
128 re_string_translate_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
129 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
130 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
131 pstr->valid_len = pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
132 pstr->valid_raw_len = pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
133 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
134 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
135 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
136
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
137 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
138 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
139
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
140 /* Helper functions for re_string_allocate, and re_string_construct. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
141
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
142 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
143 re_string_realloc_buffers (pstr, new_buf_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
144 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
145 int new_buf_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
146 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
147 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
148 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
149 {
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
150 wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
151 if (BE (new_wcs == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
152 return REG_ESPACE;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
153 pstr->wcs = new_wcs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
154 if (pstr->offsets != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
155 {
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
156 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
157 if (BE (new_offsets == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
158 return REG_ESPACE;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
159 pstr->offsets = new_offsets;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
160 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
161 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
162 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
163 if (pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
164 {
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
165 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
166 new_buf_len);
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
167 if (BE (new_mbs == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
168 return REG_ESPACE;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
169 pstr->mbs = new_mbs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
170 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
171 pstr->bufs_len = new_buf_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
172 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
173 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
174
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
175
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
176 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
177 re_string_construct_common (str, len, pstr, trans, icase, dfa)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
178 const char *str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
179 int len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
180 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
181 RE_TRANSLATE_TYPE trans;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
182 int icase;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
183 const re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
184 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
185 pstr->raw_mbs = (const unsigned char *) str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
186 pstr->len = len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
187 pstr->raw_len = len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
188 pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
189 pstr->icase = icase ? 1 : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
190 pstr->mbs_allocated = (trans != NULL || icase);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
191 pstr->mb_cur_max = dfa->mb_cur_max;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
192 pstr->is_utf8 = dfa->is_utf8;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
193 pstr->map_notascii = dfa->map_notascii;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
194 pstr->stop = pstr->len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
195 pstr->raw_stop = pstr->stop;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
196 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
197
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
198 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
199
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
200 /* Build wide character buffer PSTR->WCS.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
201 If the byte sequence of the string are:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
202 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
203 Then wide character buffer will be:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
204 <wc1> , WEOF , <wc2> , WEOF , <wc3>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
205 We use WEOF for padding, they indicate that the position isn't
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
206 a first byte of a multibyte character.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
207
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
208 Note that this function assumes PSTR->VALID_LEN elements are already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
209 built and starts from PSTR->VALID_LEN. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
210
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
211 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
212 build_wcs_buffer (pstr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
213 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
214 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
215 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
216 unsigned char buf[MB_LEN_MAX];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
217 assert (MB_LEN_MAX >= pstr->mb_cur_max);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
218 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
219 unsigned char buf[64];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
220 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
221 mbstate_t prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
222 int byte_idx, end_idx, remain_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
223 size_t mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
224
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
225 /* Build the buffers from pstr->valid_len to either pstr->len or
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
226 pstr->bufs_len. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
227 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
228 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
229 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
230 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
231 const char *p;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
232
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
233 remain_len = end_idx - byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
234 prev_st = pstr->cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
235 /* Apply the translation if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
236 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
237 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
238 int i, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
239
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
240 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
241 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
242 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
243 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
244 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
245 p = (const char *) buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
246 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
247 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
248 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
249 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
250 if (BE (mbclen == (size_t) -2, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
251 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
252 /* The buffer doesn't have enough space, finish to build. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
253 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
254 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
255 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
256 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
257 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
258 /* We treat these cases as a singlebyte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
259 mbclen = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
260 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
261 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
262 wc = pstr->trans[wc];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
263 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
264 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
265
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
266 /* Write wide character and padding. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
267 pstr->wcs[byte_idx++] = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
268 /* Write paddings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
269 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
270 pstr->wcs[byte_idx++] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
271 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
272 pstr->valid_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
273 pstr->valid_raw_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
274 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
275
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
276 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
277 but for REG_ICASE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
278
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
279 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
280 build_wcs_upper_buffer (pstr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
281 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
282 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
283 mbstate_t prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
284 int src_idx, byte_idx, end_idx, remain_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
285 size_t mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
286 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
287 char buf[MB_LEN_MAX];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
288 assert (MB_LEN_MAX >= pstr->mb_cur_max);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
289 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
290 char buf[64];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
291 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
292
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
293 byte_idx = pstr->valid_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
294 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
295
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
296 /* The following optimization assumes that ASCII characters can be
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
297 mapped to wide characters with a simple cast. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
298 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
299 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
300 while (byte_idx < end_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
301 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
302 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
303
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
304 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
305 && mbsinit (&pstr->cur_state))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
306 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
307 /* In case of a singlebyte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
308 pstr->mbs[byte_idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
309 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
310 /* The next step uses the assumption that wchar_t is encoded
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
311 ASCII-safe: all ASCII values can be converted like this. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
312 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
313 ++byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
314 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
315 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
316
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
317 remain_len = end_idx - byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
318 prev_st = pstr->cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
319 mbclen = mbrtowc (&wc,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
320 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
321 + byte_idx), remain_len, &pstr->cur_state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
322 if (BE (mbclen + 2 > 2, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
323 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
324 wchar_t wcu = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
325 if (iswlower (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
326 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
327 size_t mbcdlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
328
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
329 wcu = towupper (wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
330 mbcdlen = wcrtomb (buf, wcu, &prev_st);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
331 if (BE (mbclen == mbcdlen, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
332 memcpy (pstr->mbs + byte_idx, buf, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
333 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
334 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
335 src_idx = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
336 goto offsets_needed;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
337 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
338 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
339 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
340 memcpy (pstr->mbs + byte_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
341 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
342 pstr->wcs[byte_idx++] = wcu;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
343 /* Write paddings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
344 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
345 pstr->wcs[byte_idx++] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
346 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
347 else if (mbclen == (size_t) -1 || mbclen == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
348 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
349 /* It is an invalid character or '\0'. Just use the byte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
350 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
351 pstr->mbs[byte_idx] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
352 /* And also cast it to wide char. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
353 pstr->wcs[byte_idx++] = (wchar_t) ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
354 if (BE (mbclen == (size_t) -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
355 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
356 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
357 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
358 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
359 /* The buffer doesn't have enough space, finish to build. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
360 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
361 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
362 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
363 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
364 pstr->valid_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
365 pstr->valid_raw_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
366 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
367 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
368 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
369 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
370 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
371 wchar_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
372 const char *p;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
373 offsets_needed:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
374 remain_len = end_idx - byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
375 prev_st = pstr->cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
376 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
377 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
378 int i, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
379
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
380 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
381 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
382 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
383 buf[i] = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
384 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
385 p = (const char *) buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
386 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
387 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
388 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
389 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
390 if (BE (mbclen + 2 > 2, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
391 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
392 wchar_t wcu = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
393 if (iswlower (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
394 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
395 size_t mbcdlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
396
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
397 wcu = towupper (wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
398 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
399 if (BE (mbclen == mbcdlen, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
400 memcpy (pstr->mbs + byte_idx, buf, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
401 else if (mbcdlen != (size_t) -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
402 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
403 size_t i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
404
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
405 if (byte_idx + mbcdlen > pstr->bufs_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
406 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
407 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
408 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
409 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
410
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
411 if (pstr->offsets == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
412 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
413 pstr->offsets = re_malloc (int, pstr->bufs_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
414
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
415 if (pstr->offsets == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
416 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
417 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
418 if (!pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
419 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
420 for (i = 0; i < (size_t) byte_idx; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
421 pstr->offsets[i] = i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
422 pstr->offsets_needed = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
423 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
424
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
425 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
426 pstr->wcs[byte_idx] = wcu;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
427 pstr->offsets[byte_idx] = src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
428 for (i = 1; i < mbcdlen; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
429 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
430 pstr->offsets[byte_idx + i]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
431 = src_idx + (i < mbclen ? i : mbclen - 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
432 pstr->wcs[byte_idx + i] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
433 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
434 pstr->len += mbcdlen - mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
435 if (pstr->raw_stop > src_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
436 pstr->stop += mbcdlen - mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
437 end_idx = (pstr->bufs_len > pstr->len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
438 ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
439 byte_idx += mbcdlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
440 src_idx += mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
441 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
442 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
443 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
444 memcpy (pstr->mbs + byte_idx, p, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
445 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
446 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
447 memcpy (pstr->mbs + byte_idx, p, mbclen);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
448
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
449 if (BE (pstr->offsets_needed != 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
450 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
451 size_t i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
452 for (i = 0; i < mbclen; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
453 pstr->offsets[byte_idx + i] = src_idx + i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
454 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
455 src_idx += mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
456
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
457 pstr->wcs[byte_idx++] = wcu;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
458 /* Write paddings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
459 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
460 pstr->wcs[byte_idx++] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
461 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
462 else if (mbclen == (size_t) -1 || mbclen == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
463 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
464 /* It is an invalid character or '\0'. Just use the byte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
465 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
466
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
467 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
468 ch = pstr->trans [ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
469 pstr->mbs[byte_idx] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
470
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
471 if (BE (pstr->offsets_needed != 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
472 pstr->offsets[byte_idx] = src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
473 ++src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
474
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
475 /* And also cast it to wide char. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
476 pstr->wcs[byte_idx++] = (wchar_t) ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
477 if (BE (mbclen == (size_t) -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
478 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
479 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
480 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
481 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
482 /* The buffer doesn't have enough space, finish to build. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
483 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
484 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
485 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
486 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
487 pstr->valid_len = byte_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
488 pstr->valid_raw_len = src_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
489 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
490 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
491
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
492 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
493 Return the index. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
494
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
495 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
496 re_string_skip_chars (pstr, new_raw_idx, last_wc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
497 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
498 int new_raw_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
499 wint_t *last_wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
500 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
501 mbstate_t prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
502 int rawbuf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
503 size_t mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
504 wchar_t wc = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
505
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
506 /* Skip the characters which are not necessary to check. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
507 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
508 rawbuf_idx < new_raw_idx;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
509 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
510 int remain_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
511 remain_len = pstr->len - rawbuf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
512 prev_st = pstr->cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
513 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
514 remain_len, &pstr->cur_state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
515 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
516 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
517 /* We treat these cases as a singlebyte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
518 mbclen = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
519 pstr->cur_state = prev_st;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
520 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
521 /* Then proceed the next character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
522 rawbuf_idx += mbclen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
523 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
524 *last_wc = (wint_t) wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
525 return rawbuf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
526 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
527 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
528
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
529 /* Build the buffer PSTR->MBS, and apply the translation if we need.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
530 This function is used in case of REG_ICASE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
531
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
532 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
533 build_upper_buffer (pstr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
534 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
535 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
536 int char_idx, end_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
537 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
538
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
539 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
540 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
541 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
542 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
543 ch = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
544 if (islower (ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
545 pstr->mbs[char_idx] = toupper (ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
546 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
547 pstr->mbs[char_idx] = ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
548 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
549 pstr->valid_len = char_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
550 pstr->valid_raw_len = char_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
551 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
552
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
553 /* Apply TRANS to the buffer in PSTR. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
554
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
555 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
556 re_string_translate_buffer (pstr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
557 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
558 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
559 int buf_idx, end_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
560 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
561
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
562 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
563 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
564 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
565 pstr->mbs[buf_idx] = pstr->trans[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
566 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
567
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
568 pstr->valid_len = buf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
569 pstr->valid_raw_len = buf_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
570 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
571
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
572 /* This function re-construct the buffers.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
573 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
574 convert to upper case in case of REG_ICASE, apply translation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
575
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
576 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
577 re_string_reconstruct (pstr, idx, eflags)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
578 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
579 int idx, eflags;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
580 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
581 int offset = idx - pstr->raw_mbs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
582 if (BE (offset < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
583 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
584 /* Reset buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
585 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
586 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
587 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
588 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
589 pstr->len = pstr->raw_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
590 pstr->stop = pstr->raw_stop;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
591 pstr->valid_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
592 pstr->raw_mbs_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
593 pstr->valid_raw_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
594 pstr->offsets_needed = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
595 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
596 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
597 if (!pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
598 pstr->mbs = (unsigned char *) pstr->raw_mbs;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
599 offset = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
600 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
601
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
602 if (BE (offset != 0, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
603 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
604 /* Are the characters which are already checked remain? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
605 if (BE (offset < pstr->valid_raw_len, 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
606 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
607 /* Handling this would enlarge the code too much.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
608 Accept a slowdown in that case. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
609 && pstr->offsets_needed == 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
610 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
611 )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
612 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
613 /* Yes, move them to the front of the buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
614 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
615 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
616 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
617 memmove (pstr->wcs, pstr->wcs + offset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
618 (pstr->valid_len - offset) * sizeof (wint_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
619 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
620 if (BE (pstr->mbs_allocated, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
621 memmove (pstr->mbs, pstr->mbs + offset,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
622 pstr->valid_len - offset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
623 pstr->valid_len -= offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
624 pstr->valid_raw_len -= offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
625 #if DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
626 assert (pstr->valid_len > 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
627 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
628 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
629 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
630 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
631 /* No, skip all characters until IDX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
632 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
633 if (BE (pstr->offsets_needed, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
634 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
635 pstr->len = pstr->raw_len - idx + offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
636 pstr->stop = pstr->raw_stop - idx + offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
637 pstr->offsets_needed = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
638 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
639 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
640 pstr->valid_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
641 pstr->valid_raw_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
642 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
643 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
644 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
645 int wcs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
646 wint_t wc = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
647
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
648 if (pstr->is_utf8)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
649 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
650 const unsigned char *raw, *p, *q, *end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
651
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
652 /* Special case UTF-8. Multi-byte chars start with any
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
653 byte other than 0x80 - 0xbf. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
654 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
655 end = raw + (offset - pstr->mb_cur_max);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
656 for (p = raw + offset - 1; p >= end; --p)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
657 if ((*p & 0xc0) != 0x80)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
658 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
659 mbstate_t cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
660 wchar_t wc2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
661 int mlen = raw + pstr->len - p;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
662 unsigned char buf[6];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
663
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
664 q = p;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
665 if (BE (pstr->trans != NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
666 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
667 int i = mlen < 6 ? mlen : 6;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
668 while (--i >= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
669 buf[i] = pstr->trans[p[i]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
670 q = buf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
671 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
672 /* XXX Don't use mbrtowc, we know which conversion
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
673 to use (UTF-8 -> UCS4). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
674 memset (&cur_state, 0, sizeof (cur_state));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
675 mlen = (mbrtowc (&wc2, (const char *) p, mlen,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
676 &cur_state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
677 - (raw + offset - p));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
678 if (mlen >= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
679 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
680 memset (&pstr->cur_state, '\0',
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
681 sizeof (mbstate_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
682 pstr->valid_len = mlen;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
683 wc = wc2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
684 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
685 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
686 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
687 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
688
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
689 if (wc == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
690 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
691 if (BE (pstr->valid_len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
692 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
693 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
694 pstr->wcs[wcs_idx] = WEOF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
695 if (pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
696 memset (pstr->mbs, 255, pstr->valid_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
697 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
698 pstr->valid_raw_len = pstr->valid_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
699 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
700 && IS_WIDE_WORD_CHAR (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
701 ? CONTEXT_WORD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
702 : ((IS_WIDE_NEWLINE (wc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
703 && pstr->newline_anchor)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
704 ? CONTEXT_NEWLINE : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
705 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
706 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
707 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
708 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
709 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
710 if (pstr->trans)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
711 c = pstr->trans[c];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
712 pstr->tip_context = (bitset_contain (pstr->word_char, c)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
713 ? CONTEXT_WORD
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
714 : ((IS_NEWLINE (c) && pstr->newline_anchor)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
715 ? CONTEXT_NEWLINE : 0));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
716 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
717 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
718 if (!BE (pstr->mbs_allocated, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
719 pstr->mbs += offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
720 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
721 pstr->raw_mbs_idx = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
722 pstr->len -= offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
723 pstr->stop -= offset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
724
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
725 /* Then build the buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
726 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
727 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
728 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
729 if (pstr->icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
730 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
731 int ret = build_wcs_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
732 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
733 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
734 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
735 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
736 build_wcs_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
737 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
738 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
739 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
740 if (BE (pstr->mbs_allocated, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
741 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
742 if (pstr->icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
743 build_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
744 else if (pstr->trans != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
745 re_string_translate_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
746 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
747 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
748 pstr->valid_len = pstr->len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
749
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
750 pstr->cur_idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
751 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
752 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
753
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
754 static unsigned char
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
755 re_string_peek_byte_case (pstr, idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
756 const re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
757 int idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
758 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
759 int ch, off;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
760
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
761 /* Handle the common (easiest) cases first. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
762 if (BE (!pstr->mbs_allocated, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
763 return re_string_peek_byte (pstr, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
764
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
765 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
766 if (pstr->mb_cur_max > 1
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
767 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
768 return re_string_peek_byte (pstr, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
769 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
770
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
771 off = pstr->cur_idx + idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
772 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
773 if (pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
774 off = pstr->offsets[off];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
775 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
776
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
777 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
778
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
779 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
780 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
781 this function returns CAPITAL LETTER I instead of first byte of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
782 DOTLESS SMALL LETTER I. The latter would confuse the parser,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
783 since peek_byte_case doesn't advance cur_idx in any way. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
784 if (pstr->offsets_needed && !isascii (ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
785 return re_string_peek_byte (pstr, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
786 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
787
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
788 return ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
789 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
790
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
791 static unsigned char
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
792 re_string_fetch_byte_case (pstr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
793 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
794 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
795 if (BE (!pstr->mbs_allocated, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
796 return re_string_fetch_byte (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
797
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
798 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
799 if (pstr->offsets_needed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
800 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
801 int off, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
802
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
803 /* For tr_TR.UTF-8 [[:islower:]] there is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
804 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
805 in that case the whole multi-byte character and return
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
806 the original letter. On the other side, with
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
807 [[: DOTLESS SMALL LETTER I return [[:I, as doing
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
808 anything else would complicate things too much. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
809
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
810 if (!re_string_first_byte (pstr, pstr->cur_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
811 return re_string_fetch_byte (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
812
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
813 off = pstr->offsets[pstr->cur_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
814 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
815
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
816 if (! isascii (ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
817 return re_string_fetch_byte (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
818
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
819 re_string_skip_bytes (pstr,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
820 re_string_char_size_at (pstr, pstr->cur_idx));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
821 return ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
822 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
823 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
824
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
825 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
826 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
827
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
828 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
829 re_string_destruct (pstr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
830 re_string_t *pstr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
831 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
832 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
833 re_free (pstr->wcs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
834 re_free (pstr->offsets);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
835 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
836 if (pstr->mbs_allocated)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
837 re_free (pstr->mbs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
838 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
839
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
840 /* Return the context at IDX in INPUT. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
841
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
842 static unsigned int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
843 re_string_context_at (input, idx, eflags)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
844 const re_string_t *input;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
845 int idx, eflags;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
846 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
847 int c;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
848 if (BE (idx < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
849 /* In this case, we use the value stored in input->tip_context,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
850 since we can't know the character in input->mbs[-1] here. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
851 return input->tip_context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
852 if (BE (idx == input->len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
853 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
854 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
855 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
856 if (input->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
857 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
858 wint_t wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
859 int wc_idx = idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
860 while(input->wcs[wc_idx] == WEOF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
861 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
862 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
863 /* It must not happen. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
864 assert (wc_idx >= 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
865 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
866 --wc_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
867 if (wc_idx < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
868 return input->tip_context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
869 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
870 wc = input->wcs[wc_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
871 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
872 return CONTEXT_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
873 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
874 ? CONTEXT_NEWLINE : 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
875 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
876 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
877 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
878 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
879 c = re_string_byte_at (input, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
880 if (bitset_contain (input->word_char, c))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
881 return CONTEXT_WORD;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
882 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
883 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
884 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
885
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
886 /* Functions for set operation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
887
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
888 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
889 re_node_set_alloc (set, size)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
890 re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
891 int size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
892 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
893 set->alloc = size;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
894 set->nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
895 set->elems = re_malloc (int, size);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
896 if (BE (set->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
897 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
898 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
899 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
900
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
901 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
902 re_node_set_init_1 (set, elem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
903 re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
904 int elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
905 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
906 set->alloc = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
907 set->nelem = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
908 set->elems = re_malloc (int, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
909 if (BE (set->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
910 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
911 set->alloc = set->nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
912 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
913 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
914 set->elems[0] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
915 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
916 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
917
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
918 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
919 re_node_set_init_2 (set, elem1, elem2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
920 re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
921 int elem1, elem2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
922 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
923 set->alloc = 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
924 set->elems = re_malloc (int, 2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
925 if (BE (set->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
926 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
927 if (elem1 == elem2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
928 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
929 set->nelem = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
930 set->elems[0] = elem1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
931 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
932 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
933 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
934 set->nelem = 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
935 if (elem1 < elem2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
936 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
937 set->elems[0] = elem1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
938 set->elems[1] = elem2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
939 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
940 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
941 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
942 set->elems[0] = elem2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
943 set->elems[1] = elem1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
944 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
945 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
946 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
947 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
948
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
949 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
950 re_node_set_init_copy (dest, src)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
951 re_node_set *dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
952 const re_node_set *src;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
953 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
954 dest->nelem = src->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
955 if (src->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
956 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
957 dest->alloc = dest->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
958 dest->elems = re_malloc (int, dest->alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
959 if (BE (dest->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
960 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
961 dest->alloc = dest->nelem = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
962 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
963 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
964 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
965 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
966 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
967 re_node_set_init_empty (dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
968 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
969 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
970
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
971 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
972 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
973 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
974
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
975 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
976 re_node_set_add_intersect (dest, src1, src2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
977 re_node_set *dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
978 const re_node_set *src1, *src2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
979 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
980 int i1, i2, is, id, delta, sbase;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
981 if (src1->nelem == 0 || src2->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
982 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
983
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
984 /* We need dest->nelem + 2 * elems_in_intersection; this is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
985 conservative estimate. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
986 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
987 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
988 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
989 int *new_elems = re_realloc (dest->elems, int, new_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
990 if (BE (new_elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
991 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
992 dest->elems = new_elems;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
993 dest->alloc = new_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
994 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
995
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
996 /* Find the items in the intersection of SRC1 and SRC2, and copy
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
997 into the top of DEST those that are not already in DEST itself. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
998 sbase = dest->nelem + src1->nelem + src2->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
999 i1 = src1->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1000 i2 = src2->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1001 id = dest->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1002 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1003 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1004 if (src1->elems[i1] == src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1005 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1006 /* Try to find the item in DEST. Maybe we could binary search? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1007 while (id >= 0 && dest->elems[id] > src1->elems[i1])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1008 --id;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1009
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1010 if (id < 0 || dest->elems[id] != src1->elems[i1])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1011 dest->elems[--sbase] = src1->elems[i1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1012
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1013 if (--i1 < 0 || --i2 < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1014 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1015 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1016
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1017 /* Lower the highest of the two items. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1018 else if (src1->elems[i1] < src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1019 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1020 if (--i2 < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1021 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1022 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1023 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1024 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1025 if (--i1 < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1026 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1027 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1028 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1029
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1030 id = dest->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1031 is = dest->nelem + src1->nelem + src2->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1032 delta = is - sbase + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1033
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1034 /* Now copy. When DELTA becomes zero, the remaining
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1035 DEST elements are already in place; this is more or
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1036 less the same loop that is in re_node_set_merge. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1037 dest->nelem += delta;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1038 if (delta > 0 && id >= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1039 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1040 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1041 if (dest->elems[is] > dest->elems[id])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1042 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1043 /* Copy from the top. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1044 dest->elems[id + delta--] = dest->elems[is--];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1045 if (delta == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1046 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1047 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1048 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1049 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1050 /* Slide from the bottom. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1051 dest->elems[id + delta] = dest->elems[id];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1052 if (--id < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1053 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1054 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1055 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1056
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1057 /* Copy remaining SRC elements. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1058 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1059
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1060 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1061 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1062
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1063 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1064 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1065
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1066 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1067 re_node_set_init_union (dest, src1, src2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1068 re_node_set *dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1069 const re_node_set *src1, *src2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1070 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1071 int i1, i2, id;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1072 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1073 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1074 dest->alloc = src1->nelem + src2->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1075 dest->elems = re_malloc (int, dest->alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1076 if (BE (dest->elems == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1077 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1078 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1079 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1080 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1081 if (src1 != NULL && src1->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1082 return re_node_set_init_copy (dest, src1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1083 else if (src2 != NULL && src2->nelem > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1084 return re_node_set_init_copy (dest, src2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1085 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1086 re_node_set_init_empty (dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1087 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1088 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1089 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1090 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1091 if (src1->elems[i1] > src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1092 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1093 dest->elems[id++] = src2->elems[i2++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1094 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1095 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1096 if (src1->elems[i1] == src2->elems[i2])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1097 ++i2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1098 dest->elems[id++] = src1->elems[i1++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1099 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1100 if (i1 < src1->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1101 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1102 memcpy (dest->elems + id, src1->elems + i1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1103 (src1->nelem - i1) * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1104 id += src1->nelem - i1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1105 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1106 else if (i2 < src2->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1107 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1108 memcpy (dest->elems + id, src2->elems + i2,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1109 (src2->nelem - i2) * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1110 id += src2->nelem - i2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1111 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1112 dest->nelem = id;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1113 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1114 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1115
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1116 /* Calculate the union set of the sets DEST and SRC. And store it to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1117 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1118
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1119 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1120 re_node_set_merge (dest, src)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1121 re_node_set *dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1122 const re_node_set *src;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1123 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1124 int is, id, sbase, delta;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1125 if (src == NULL || src->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1126 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1127 if (dest->alloc < 2 * src->nelem + dest->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1128 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1129 int new_alloc = 2 * (src->nelem + dest->alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1130 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1131 if (BE (new_buffer == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1132 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1133 dest->elems = new_buffer;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1134 dest->alloc = new_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1135 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1136
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1137 if (BE (dest->nelem == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1138 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1139 dest->nelem = src->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1140 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1141 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1142 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1143
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1144 /* Copy into the top of DEST the items of SRC that are not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1145 found in DEST. Maybe we could binary search in DEST? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1146 for (sbase = dest->nelem + 2 * src->nelem,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1147 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1148 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1149 if (dest->elems[id] == src->elems[is])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1150 is--, id--;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1151 else if (dest->elems[id] < src->elems[is])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1152 dest->elems[--sbase] = src->elems[is--];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1153 else /* if (dest->elems[id] > src->elems[is]) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1154 --id;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1155 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1156
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1157 if (is >= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1158 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1159 /* If DEST is exhausted, the remaining items of SRC must be unique. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1160 sbase -= is + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1161 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1162 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1163
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1164 id = dest->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1165 is = dest->nelem + 2 * src->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1166 delta = is - sbase + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1167 if (delta == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1168 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1169
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1170 /* Now copy. When DELTA becomes zero, the remaining
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1171 DEST elements are already in place. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1172 dest->nelem += delta;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1173 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1174 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1175 if (dest->elems[is] > dest->elems[id])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1176 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1177 /* Copy from the top. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1178 dest->elems[id + delta--] = dest->elems[is--];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1179 if (delta == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1180 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1181 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1182 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1183 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1184 /* Slide from the bottom. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1185 dest->elems[id + delta] = dest->elems[id];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1186 if (--id < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1187 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1188 /* Copy remaining SRC elements. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1189 memcpy (dest->elems, dest->elems + sbase,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1190 delta * sizeof (int));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1191 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1192 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1193 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1194 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1195
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1196 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1197 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1198
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1199 /* Insert the new element ELEM to the re_node_set* SET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1200 SET should not already have ELEM.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1201 return -1 if an error is occured, return 1 otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1202
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1203 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1204 re_node_set_insert (set, elem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1205 re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1206 int elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1207 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1208 int idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1209 /* In case the set is empty. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1210 if (set->alloc == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1211 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1212 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1213 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1214 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1215 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1216 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1217
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1218 if (BE (set->nelem, 0) == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1219 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1220 /* We already guaranteed above that set->alloc != 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1221 set->elems[0] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1222 ++set->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1223 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1224 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1225
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1226 /* Realloc if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1227 if (set->alloc == set->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1228 {
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1229 int *new_elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1230 set->alloc = set->alloc * 2;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1231 new_elems = re_realloc (set->elems, int, set->alloc);
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1232 if (BE (new_elems == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1233 return -1;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1234 set->elems = new_elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1235 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1236
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1237 /* Move the elements which follows the new element. Test the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1238 first element separately to skip a check in the inner loop. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1239 if (elem < set->elems[0])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1240 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1241 idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1242 for (idx = set->nelem; idx > 0; idx--)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1243 set->elems[idx] = set->elems[idx - 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1244 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1245 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1246 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1247 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1248 set->elems[idx] = set->elems[idx - 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1249 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1250
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1251 /* Insert the new element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1252 set->elems[idx] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1253 ++set->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1254 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1255 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1256
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1257 /* Insert the new element ELEM to the re_node_set* SET.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1258 SET should not already have any element greater than or equal to ELEM.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1259 Return -1 if an error is occured, return 1 otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1260
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1261 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1262 re_node_set_insert_last (set, elem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1263 re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1264 int elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1265 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1266 /* Realloc if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1267 if (set->alloc == set->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1268 {
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1269 int *new_elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1270 set->alloc = (set->alloc + 1) * 2;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1271 new_elems = re_realloc (set->elems, int, set->alloc);
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1272 if (BE (new_elems == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1273 return -1;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1274 set->elems = new_elems;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1275 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1276
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1277 /* Insert the new element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1278 set->elems[set->nelem++] = elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1279 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1280 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1281
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1282 /* Compare two node sets SET1 and SET2.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1283 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1284
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1285 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1286 re_node_set_compare (set1, set2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1287 const re_node_set *set1, *set2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1288 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1289 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1290 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1291 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1292 for (i = set1->nelem ; --i >= 0 ; )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1293 if (set1->elems[i] != set2->elems[i])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1294 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1295 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1296 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1297
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1298 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1299
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1300 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1301 re_node_set_contains (set, elem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1302 const re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1303 int elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1304 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1305 unsigned int idx, right, mid;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1306 if (set->nelem <= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1307 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1308
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1309 /* Binary search the element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1310 idx = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1311 right = set->nelem - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1312 while (idx < right)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1313 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1314 mid = (idx + right) / 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1315 if (set->elems[mid] < elem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1316 idx = mid + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1317 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1318 right = mid;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1319 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1320 return set->elems[idx] == elem ? idx + 1 : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1321 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1322
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1323 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1324 re_node_set_remove_at (set, idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1325 re_node_set *set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1326 int idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1327 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1328 if (idx < 0 || idx >= set->nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1329 return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1330 --set->nelem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1331 for (; idx < set->nelem; idx++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1332 set->elems[idx] = set->elems[idx + 1];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1333 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1334
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1335
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1336 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1337 Or return -1, if an error will be occured. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1338
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1339 static int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1340 re_dfa_add_node (dfa, token)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1341 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1342 re_token_t token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1343 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1344 int type = token.type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1345 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1346 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1347 int new_nodes_alloc = dfa->nodes_alloc * 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1348 int *new_nexts, *new_indices;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1349 re_node_set *new_edests, *new_eclosures;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1350
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1351 re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1352 new_nodes_alloc);
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1353 if (BE (new_nodes == NULL, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1354 return -1;
6067
82e7d3903d95 (re_string_realloc_buffers, re_node_set_insert):
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1355 dfa->nodes = new_nodes;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1356 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1357 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1358 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1359 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1360 if (BE (new_nexts == NULL || new_indices == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1361 || new_edests == NULL || new_eclosures == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1362 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1363 dfa->nexts = new_nexts;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1364 dfa->org_indices = new_indices;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1365 dfa->edests = new_edests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1366 dfa->eclosures = new_eclosures;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1367 dfa->nodes_alloc = new_nodes_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1368 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1369 dfa->nodes[dfa->nodes_len] = token;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1370 dfa->nodes[dfa->nodes_len].constraint = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1371 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1372 dfa->nodes[dfa->nodes_len].accept_mb =
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1373 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1374 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1375 dfa->nexts[dfa->nodes_len] = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1376 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1377 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1378 return dfa->nodes_len++;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1379 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1380
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1381 static unsigned int inline
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1382 calc_state_hash (nodes, context)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1383 const re_node_set *nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1384 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1385 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1386 unsigned int hash = nodes->nelem + context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1387 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1388 for (i = 0 ; i < nodes->nelem ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1389 hash += nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1390 return hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1391 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1392
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1393 /* Search for the state whose node_set is equivalent to NODES.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1394 Return the pointer to the state, if we found it in the DFA.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1395 Otherwise create the new one and return it. In case of an error
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1396 return NULL and set the error code in ERR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1397 Note: - We assume NULL as the invalid state, then it is possible that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1398 return value is NULL and ERR is REG_NOERROR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1399 - We never return non-NULL value in case of any errors, it is for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1400 optimization. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1401
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1402 static re_dfastate_t*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1403 re_acquire_state (err, dfa, nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1404 reg_errcode_t *err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1405 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1406 const re_node_set *nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1407 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1408 unsigned int hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1409 re_dfastate_t *new_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1410 struct re_state_table_entry *spot;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1411 int i;
6073
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1412 #ifdef lint
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1413 /* Suppress bogus uninitialized-variable warnings. */
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1414 *err = REG_NOERROR;
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1415 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1416 if (BE (nodes->nelem == 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1417 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1418 *err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1419 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1420 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1421 hash = calc_state_hash (nodes, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1422 spot = dfa->state_table + (hash & dfa->state_hash_mask);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1423
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1424 for (i = 0 ; i < spot->num ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1425 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1426 re_dfastate_t *state = spot->array[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1427 if (hash != state->hash)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1428 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1429 if (re_node_set_compare (&state->nodes, nodes))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1430 return state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1431 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1432
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1433 /* There are no appropriate state in the dfa, create the new one. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1434 new_state = create_ci_newstate (dfa, nodes, hash);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1435 if (BE (new_state != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1436 return new_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1437 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1438 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1439 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1440 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1441 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1442 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1443
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1444 /* Search for the state whose node_set is equivalent to NODES and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1445 whose context is equivalent to CONTEXT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1446 Return the pointer to the state, if we found it in the DFA.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1447 Otherwise create the new one and return it. In case of an error
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1448 return NULL and set the error code in ERR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1449 Note: - We assume NULL as the invalid state, then it is possible that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1450 return value is NULL and ERR is REG_NOERROR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1451 - We never return non-NULL value in case of any errors, it is for
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1452 optimization. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1453
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1454 static re_dfastate_t*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1455 re_acquire_state_context (err, dfa, nodes, context)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1456 reg_errcode_t *err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1457 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1458 const re_node_set *nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1459 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1460 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1461 unsigned int hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1462 re_dfastate_t *new_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1463 struct re_state_table_entry *spot;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1464 int i;
6073
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1465 #ifdef lint
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1466 /* Suppress bogus uninitialized-variable warnings. */
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1467 *err = REG_NOERROR;
331a20f7531d (re_acquire_state, re_acquire_state_context) [defined lint]:
Paul Eggert <eggert@cs.ucla.edu>
parents: 6067
diff changeset
1468 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1469 if (nodes->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1470 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1471 *err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1472 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1473 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1474 hash = calc_state_hash (nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1475 spot = dfa->state_table + (hash & dfa->state_hash_mask);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1476
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1477 for (i = 0 ; i < spot->num ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1478 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1479 re_dfastate_t *state = spot->array[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1480 if (state->hash == hash
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1481 && state->context == context
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1482 && re_node_set_compare (state->entrance_nodes, nodes))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1483 return state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1484 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1485 /* There are no appropriate state in `dfa', create the new one. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1486 new_state = create_cd_newstate (dfa, nodes, context, hash);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1487 if (BE (new_state != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1488 return new_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1489 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1490 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1491 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1492 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1493 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1494 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1495
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1496 /* Finish initialization of the new state NEWSTATE, and using its hash value
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1497 HASH put in the appropriate bucket of DFA's state table. Return value
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1498 indicates the error code if failed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1499
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1500 static reg_errcode_t
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1501 register_state (dfa, newstate, hash)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1502 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1503 re_dfastate_t *newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1504 unsigned int hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1505 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1506 struct re_state_table_entry *spot;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1507 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1508 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1509
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1510 newstate->hash = hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1511 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1512 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1513 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1514 for (i = 0; i < newstate->nodes.nelem; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1515 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1516 int elem = newstate->nodes.elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1517 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1518 re_node_set_insert_last (&newstate->non_eps_nodes, elem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1519 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1520
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1521 spot = dfa->state_table + (hash & dfa->state_hash_mask);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1522 if (BE (spot->alloc <= spot->num, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1523 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1524 int new_alloc = 2 * spot->num + 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1525 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1526 new_alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1527 if (BE (new_array == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1528 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1529 spot->array = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1530 spot->alloc = new_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1531 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1532 spot->array[spot->num++] = newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1533 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1534 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1535
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1536 /* Create the new state which is independ of contexts.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1537 Return the new state if succeeded, otherwise return NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1538
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1539 static re_dfastate_t *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1540 create_ci_newstate (dfa, nodes, hash)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1541 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1542 const re_node_set *nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1543 unsigned int hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1544 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1545 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1546 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1547 re_dfastate_t *newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1548
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1549 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1550 if (BE (newstate == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1551 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1552 err = re_node_set_init_copy (&newstate->nodes, nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1553 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1554 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1555 re_free (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1556 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1557 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1558
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1559 newstate->entrance_nodes = &newstate->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1560 for (i = 0 ; i < nodes->nelem ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1561 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1562 re_token_t *node = dfa->nodes + nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1563 re_token_type_t type = node->type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1564 if (type == CHARACTER && !node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1565 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1566 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1567 newstate->accept_mb |= node->accept_mb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1568 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1569
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1570 /* If the state has the halt node, the state is a halt state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1571 if (type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1572 newstate->halt = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1573 else if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1574 newstate->has_backref = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1575 else if (type == ANCHOR || node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1576 newstate->has_constraint = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1577 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1578 err = register_state (dfa, newstate, hash);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1579 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1580 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1581 free_state (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1582 newstate = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1583 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1584 return newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1585 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1586
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1587 /* Create the new state which is depend on the context CONTEXT.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1588 Return the new state if succeeded, otherwise return NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1589
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1590 static re_dfastate_t *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1591 create_cd_newstate (dfa, nodes, context, hash)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1592 re_dfa_t *dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1593 const re_node_set *nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1594 unsigned int context, hash;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1595 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1596 int i, nctx_nodes = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1597 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1598 re_dfastate_t *newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1599
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1600 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1601 if (BE (newstate == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1602 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1603 err = re_node_set_init_copy (&newstate->nodes, nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1604 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1605 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1606 re_free (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1607 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1608 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1609
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1610 newstate->context = context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1611 newstate->entrance_nodes = &newstate->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1612
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1613 for (i = 0 ; i < nodes->nelem ; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1614 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1615 unsigned int constraint = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1616 re_token_t *node = dfa->nodes + nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1617 re_token_type_t type = node->type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1618 if (node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1619 constraint = node->constraint;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1621 if (type == CHARACTER && !constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1622 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1623 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1624 newstate->accept_mb |= node->accept_mb;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1625 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1626
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1627 /* If the state has the halt node, the state is a halt state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1628 if (type == END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1629 newstate->halt = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1630 else if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1631 newstate->has_backref = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1632 else if (type == ANCHOR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1633 constraint = node->opr.ctx_type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1634
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1635 if (constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1636 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1637 if (newstate->entrance_nodes == &newstate->nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1638 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1639 newstate->entrance_nodes = re_malloc (re_node_set, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1640 if (BE (newstate->entrance_nodes == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1641 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1642 free_state (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1643 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1644 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1645 re_node_set_init_copy (newstate->entrance_nodes, nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1646 nctx_nodes = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1647 newstate->has_constraint = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1648 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1649
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1650 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1651 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1652 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1653 ++nctx_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1654 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1655 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1656 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1657 err = register_state (dfa, newstate, hash);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1658 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1659 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1660 free_state (newstate);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1661 newstate = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1662 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1663 return newstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1664 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1665
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1666 static void
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1667 free_state (state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1668 re_dfastate_t *state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1669 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1670 re_node_set_free (&state->non_eps_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1671 re_node_set_free (&state->inveclosure);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1672 if (state->entrance_nodes != &state->nodes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1673 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1674 re_node_set_free (state->entrance_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1675 re_free (state->entrance_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1676 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1677 re_node_set_free (&state->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1678 re_free (state->word_trtable);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1679 re_free (state->trtable);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1680 re_free (state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1681 }