annotate lib/regexec.c @ 6173:0728607db20f

* lib/regexec.c (re_search_internal): Simplify update of rm_so and rm_eo by replacing "if (A == B) A += C - B;" with the equivalent of "if (A == B) A = C;". This will make the code more reliable once we port to 64-bit hosts. * config/srclist.txt: Add glibc bug 1279.
author Paul Eggert <eggert@cs.ucla.edu>
date Wed, 31 Aug 2005 19:55:29 +0000
parents 5862ee08bfc1
children 6039b763ad3c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1 /* Extended regular expression matching and search library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3 This file is part of the GNU C Library.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
5
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
6 This program is free software; you can redistribute it and/or modify
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
8 the Free Software Foundation; either version 2, or (at your option)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
9 any later version.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
10
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
11 This program is distributed in the hope that it will be useful,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
14 GNU General Public License for more details.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
15
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
16 You should have received a copy of the GNU General Public License along
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
17 with this program; if not, write to the Free Software Foundation,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
19
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
21 int n) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
22 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
23 static void match_ctx_free (re_match_context_t *cache) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
25 int str_idx, int from, int to)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
26 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
27 static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
28 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
29 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
30 int str_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
31 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
32 int node, int str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
33 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
35 re_dfastate_t **limited_sts, int last_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
36 int last_str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
37 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
38 static reg_errcode_t re_search_internal (const regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
39 const char *string, int length,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
40 int start, int range, int stop,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
41 size_t nmatch, regmatch_t pmatch[],
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
42 int eflags) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
43 static int re_search_2_stub (struct re_pattern_buffer *bufp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
44 const char *string1, int length1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
45 const char *string2, int length2,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
46 int start, int range, struct re_registers *regs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
47 int stop, int ret_len) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
48 static int re_search_stub (struct re_pattern_buffer *bufp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
49 const char *string, int length, int start,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
50 int range, int stop, struct re_registers *regs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
51 int ret_len) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
52 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
53 int nregs, int regs_allocated) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
54 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
55 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
56 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
57 int *p_match_first)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
58 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
59 static int check_halt_state_context (const re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
60 const re_dfastate_t *state, int idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
61 internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
62 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
63 regmatch_t *prev_idx_match, int cur_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
64 int cur_idx, int nmatch) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
65 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
66 int str_idx, int dest_node, int nregs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
67 regmatch_t *regs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
68 re_node_set *eps_via_nodes) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
69 static reg_errcode_t set_regs (const regex_t *preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
70 const re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
71 size_t nmatch, regmatch_t *pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
72 int fl_backtrack) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
73 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
74
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
75 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
76 static int sift_states_iter_mb (const re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
77 re_sift_context_t *sctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
78 int node_idx, int str_idx, int max_str_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
79 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
80 static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
81 re_sift_context_t *sctx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
82 static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
83 re_sift_context_t *sctx, int str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
84 re_node_set *cur_dest) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
85 static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
86 re_sift_context_t *sctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
87 int str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
88 re_node_set *dest_nodes) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
89 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
90 re_node_set *dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
91 const re_node_set *candidates) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
92 static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
93 int dst_node, int dst_idx, int src_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
94 int src_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
95 static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
96 int boundaries, int subexp_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
97 int from_node, int bkref_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
98 static int check_dst_limits_calc_pos (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
99 int limit, int subexp_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
100 int node, int str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
101 int bkref_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
102 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
103 re_node_set *dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
104 const re_node_set *candidates,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
105 re_node_set *limits,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
106 struct re_backref_cache_entry *bkref_ents,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
107 int str_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
108 static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
109 re_sift_context_t *sctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
110 int str_idx, const re_node_set *candidates) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
111 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
112 re_dfastate_t **src, int num) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
113 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
114 re_match_context_t *mctx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
115 static re_dfastate_t *transit_state (reg_errcode_t *err,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
116 re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
117 re_dfastate_t *state) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
118 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
119 re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
120 re_dfastate_t *next_state) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
121 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
122 re_node_set *cur_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
123 int str_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
124 #if 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
125 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
126 re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
127 re_dfastate_t *pstate) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
128 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
129 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
130 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
131 re_dfastate_t *pstate) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
132 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
133 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
134 const re_node_set *nodes) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
135 static reg_errcode_t get_subexp (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
136 int bkref_node, int bkref_str_idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
137 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
138 const re_sub_match_top_t *sub_top,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
139 re_sub_match_last_t *sub_last,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
140 int bkref_node, int bkref_str) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
141 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
142 int subexp_idx, int type) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
143 static reg_errcode_t check_arrival (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
144 state_array_t *path, int top_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
145 int top_str, int last_node, int last_str,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
146 int type) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
147 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
148 int str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
149 re_node_set *cur_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
150 re_node_set *next_nodes) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
151 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
152 re_node_set *cur_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
153 int ex_subexp, int type) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
154 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
155 re_node_set *dst_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
156 int target, int ex_subexp,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
157 int type) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
158 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
159 re_node_set *cur_nodes, int cur_str,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
160 int subexp_num, int type) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
161 static int build_trtable (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
162 re_dfastate_t *state) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
163 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
164 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
165 const re_string_t *input, int idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
166 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
167 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
168 size_t name_len) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
169 # endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
170 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
171 static int group_nodes_into_DFAstates (re_dfa_t *dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
172 const re_dfastate_t *state,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
173 re_node_set *states_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
174 bitset *states_ch) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
175 static int check_node_accept (const re_match_context_t *mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
176 const re_token_t *node, int idx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
177 static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
178
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
179 /* Entry point for POSIX code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
180
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
181 /* regexec searches for a given pattern, specified by PREG, in the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
182 string STRING.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
183
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
184 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
185 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
186 least NMATCH elements, and we set them to the offsets of the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
187 corresponding matched substrings.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
188
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
189 EFLAGS specifies `execution flags' which affect matching: if
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
190 REG_NOTBOL is set, then ^ does not match at the beginning of the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
191 string; if REG_NOTEOL is set, then $ does not match at the end.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
192
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
193 We return 0 if we find a match and REG_NOMATCH if not. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
194
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
195 int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
196 regexec (const regex_t *__restrict preg, const char *__restrict string,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
197 size_t nmatch, regmatch_t pmatch[], int eflags)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
198 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
199 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
200 int start, length;
6079
221ad521f976 * config/srclist.txt: Add glibc bug 1225.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6076
diff changeset
201 #ifdef _LIBC
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
202 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
6079
221ad521f976 * config/srclist.txt: Add glibc bug 1225.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6076
diff changeset
203 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
204
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
205 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
206 return REG_BADPAT;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
207
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
208 if (eflags & REG_STARTEND)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
209 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
210 start = pmatch[0].rm_so;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
211 length = pmatch[0].rm_eo;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
212 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
213 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
214 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
215 start = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
216 length = strlen (string);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
217 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
218
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
219 __libc_lock_lock (dfa->lock);
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
220 if (preg->re_no_sub)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
221 err = re_search_internal (preg, string, length, start, length - start,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
222 length, 0, NULL, eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
223 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
224 err = re_search_internal (preg, string, length, start, length - start,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
225 length, nmatch, pmatch, eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
226 __libc_lock_unlock (dfa->lock);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
227 return err != REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
228 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
229
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
230 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
231 # include <shlib-compat.h>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
232 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
233
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
234 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
235 __typeof__ (__regexec) __compat_regexec;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
236
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
237 int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
238 attribute_compat_text_section
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
239 __compat_regexec (const regex_t *__restrict preg,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
240 const char *__restrict string, size_t nmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
241 regmatch_t pmatch[], int eflags)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
242 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
243 return regexec (preg, string, nmatch, pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
244 eflags & (REG_NOTBOL | REG_NOTEOL));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
245 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
246 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
247 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
248 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
249
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
250 /* Entry points for GNU code. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
251
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
252 /* re_match, re_search, re_match_2, re_search_2
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
253
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
254 The former two functions operate on STRING with length LENGTH,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
255 while the later two operate on concatenation of STRING1 and STRING2
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
256 with lengths LENGTH1 and LENGTH2, respectively.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
257
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
258 re_match() matches the compiled pattern in BUFP against the string,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
259 starting at index START.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
260
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
261 re_search() first tries matching at index START, then it tries to match
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
262 starting from index START + 1, and so on. The last start position tried
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
263 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
264 way as re_match().)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
265
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
266 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
267 the first STOP characters of the concatenation of the strings should be
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
268 concerned.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
269
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
270 If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
271 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
272 computed relative to the concatenation, not relative to the individual
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
273 strings.)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
274
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
275 On success, re_match* functions return the length of the match, re_search*
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
276 return the position of the start of the match. Return value -1 means no
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
277 match was found and -2 indicates an internal error. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
278
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
279 int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
280 re_match (struct re_pattern_buffer *bufp, const char *string,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
281 int length, int start, struct re_registers *regs)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
282 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
283 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
284 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
285 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
286 weak_alias (__re_match, re_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
287 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
288
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
289 int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
290 re_search (struct re_pattern_buffer *bufp, const char *string,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
291 int length, int start, int range, struct re_registers *regs)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
292 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
293 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
294 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
295 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
296 weak_alias (__re_search, re_search)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
297 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
298
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
299 int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
300 re_match_2 (struct re_pattern_buffer *bufp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
301 const char *string1, int length1,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
302 const char *string2, int length2,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
303 int start, struct re_registers *regs, int stop)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
304 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
305 return re_search_2_stub (bufp, string1, length1, string2, length2,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
306 start, 0, regs, stop, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
307 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
308 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
309 weak_alias (__re_match_2, re_match_2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
310 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
311
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
312 int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
313 re_search_2 (struct re_pattern_buffer *bufp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
314 const char *string1, int length1,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
315 const char *string2, int length2,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
316 int start, int range, struct re_registers *regs, int stop)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
317 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
318 return re_search_2_stub (bufp, string1, length1, string2, length2,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
319 start, range, regs, stop, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
320 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
321 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
322 weak_alias (__re_search_2, re_search_2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
323 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
324
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
325 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
326 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
327 re_search_2_stub (struct re_pattern_buffer *bufp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
328 const char *string1, int length1,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
329 const char *string2, int length2,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
330 int start, int range, struct re_registers *regs, int stop,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
331 int ret_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
332 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
333 const char *str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
334 int rval;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
335 int len = length1 + length2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
336 int free_str = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
337
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
338 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
339 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
340
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
341 /* Concatenate the strings. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
342 if (length2 > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
343 if (length1 > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
344 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
345 char *s = re_malloc (char, len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
346
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
347 if (BE (s == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
348 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
349 memcpy (s, string1, length1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
350 memcpy (s + length1, string2, length2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
351 str = s;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
352 free_str = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
353 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
354 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
355 str = string2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
356 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
357 str = string1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
358
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
359 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
360 ret_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
361 if (free_str)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
362 re_free ((char *) str);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
363 return rval;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
364 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
365
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
366 /* The parameters have the same meaning as those of re_search.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
367 Additional parameters:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
368 If RET_LEN is nonzero the length of the match is returned (re_match style);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
369 otherwise the position of the match is returned. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
370
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
371 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
372 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
373 re_search_stub (struct re_pattern_buffer *bufp,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
374 const char *string, int length,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
375 int start, int range, int stop, struct re_registers *regs,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
376 int ret_len)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
377 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
378 reg_errcode_t result;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
379 regmatch_t *pmatch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
380 int nregs, rval;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
381 int eflags = 0;
6079
221ad521f976 * config/srclist.txt: Add glibc bug 1225.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6076
diff changeset
382 #ifdef _LIBC
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
383 re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
6079
221ad521f976 * config/srclist.txt: Add glibc bug 1225.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6076
diff changeset
384 #endif
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
385
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
386 /* Check for out-of-range. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
387 if (BE (start < 0 || start > length, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
388 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
389 if (BE (start + range > length, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
390 range = length - start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
391 else if (BE (start + range < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
392 range = -start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
393
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
394 __libc_lock_lock (dfa->lock);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
395
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
396 eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
397 eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
398
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
399 /* Compile fastmap if we haven't yet. */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
400 if (range > 0 && bufp->re_fastmap != NULL && !bufp->re_fastmap_accurate)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
401 re_compile_fastmap (bufp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
402
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
403 if (BE (bufp->re_no_sub, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
404 regs = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
405
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
406 /* We need at least 1 register. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
407 if (regs == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
408 nregs = 1;
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
409 else if (BE (bufp->re_regs_allocated == REG_FIXED
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
410 && regs->rm_num_regs < bufp->re_nsub + 1, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
411 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
412 nregs = regs->rm_num_regs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
413 if (BE (nregs < 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
414 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
415 /* Nothing can be copied to regs. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
416 regs = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
417 nregs = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
418 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
419 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
420 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
421 nregs = bufp->re_nsub + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
422 pmatch = re_malloc (regmatch_t, nregs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
423 if (BE (pmatch == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
424 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
425 rval = -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
426 goto out;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
427 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
428
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
429 result = re_search_internal (bufp, string, length, start, range, stop,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
430 nregs, pmatch, eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
431
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
432 rval = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
433
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
434 /* I hope we needn't fill ther regs with -1's when no match was found. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
435 if (result != REG_NOERROR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
436 rval = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
437 else if (regs != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
438 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
439 /* If caller wants register contents data back, copy them. */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
440 bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs,
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
441 bufp->re_regs_allocated);
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
442 if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
443 rval = -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
444 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
445
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
446 if (BE (rval == 0, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
447 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
448 if (ret_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
449 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
450 assert (pmatch[0].rm_so == start);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
451 rval = pmatch[0].rm_eo - start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
452 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
453 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
454 rval = pmatch[0].rm_so;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
455 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
456 re_free (pmatch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
457 out:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
458 __libc_lock_unlock (dfa->lock);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
459 return rval;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
460 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
461
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
462 static unsigned
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
463 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
464 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
465 int regs_allocated)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
466 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
467 int rval = REG_REALLOCATE;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
468 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
469 int need_regs = nregs + 1;
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
470 /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
471 uses. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
472
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
473 /* Have the register data arrays been allocated? */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
474 if (regs_allocated == REG_UNALLOCATED)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
475 { /* No. So allocate them with malloc. */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
476 regs->rm_start = re_malloc (regoff_t, need_regs);
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
477 regs->rm_end = re_malloc (regoff_t, need_regs);
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
478 if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0))
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
479 return REG_UNALLOCATED;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
480 regs->rm_num_regs = need_regs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
481 }
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
482 else if (regs_allocated == REG_REALLOCATE)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
483 { /* Yes. If we need more elements than were already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
484 allocated, reallocate them. If we need fewer, just
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
485 leave it alone. */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
486 if (BE (need_regs > regs->rm_num_regs, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
487 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
488 regoff_t *new_start =
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
489 re_realloc (regs->rm_start, regoff_t, need_regs);
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
490 regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
491 if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
492 return REG_UNALLOCATED;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
493 regs->rm_start = new_start;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
494 regs->rm_end = new_end;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
495 regs->rm_num_regs = need_regs;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
496 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
497 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
498 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
499 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
500 assert (regs_allocated == REG_FIXED);
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
501 /* This function may not be called with REG_FIXED and nregs too big. */
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
502 assert (regs->rm_num_regs >= nregs);
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
503 rval = REG_FIXED;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
504 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
505
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
506 /* Copy the regs. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
507 for (i = 0; i < nregs; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
508 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
509 regs->rm_start[i] = pmatch[i].rm_so;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
510 regs->rm_end[i] = pmatch[i].rm_eo;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
511 }
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
512 for ( ; i < regs->rm_num_regs; ++i)
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
513 regs->rm_start[i] = regs->rm_end[i] = -1;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
514
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
515 return rval;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
516 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
517
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
518 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
519 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
520 this memory for recording register information. STARTS and ENDS
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
521 must be allocated using the malloc library routine, and must each
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
522 be at least NUM_REGS * sizeof (regoff_t) bytes long.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
523
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
524 If NUM_REGS == 0, then subsequent matches should allocate their own
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
525 register data.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
526
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
527 Unless this function is called, the first search or match using
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
528 PATTERN_BUFFER will allocate its own register data, without
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
529 freeing the old data. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
530
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
531 void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
532 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
533 unsigned int num_regs, regoff_t *starts, regoff_t *ends)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
534 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
535 if (num_regs)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
536 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
537 bufp->re_regs_allocated = REG_REALLOCATE;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
538 regs->rm_num_regs = num_regs;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
539 regs->rm_start = starts;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
540 regs->rm_end = ends;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
541 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
542 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
543 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
544 bufp->re_regs_allocated = REG_UNALLOCATED;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
545 regs->rm_num_regs = 0;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
546 regs->rm_start = regs->rm_end = NULL;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
547 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
548 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
549 #ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
550 weak_alias (__re_set_registers, re_set_registers)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
551 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
552
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
553 /* Entry points compatible with 4.2 BSD regex library. We don't define
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
554 them unless specifically requested. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
555
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
556 #if defined _REGEX_RE_COMP || defined _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
557 int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
558 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
559 weak_function
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
560 # endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
561 re_exec (s)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
562 const char *s;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
563 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
564 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
565 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
566 #endif /* _REGEX_RE_COMP */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
567
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
568 /* Internal entry point. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
569
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
570 /* Searches for a compiled pattern PREG in the string STRING, whose
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
571 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
572 mingings with regexec. START, and RANGE have the same meanings
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
573 with re_search.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
574 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
575 otherwise return the error code.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
576 Note: We assume front end functions already check ranges.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
577 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
578
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
579 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
580 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
581 re_search_internal (const regex_t *preg,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
582 const char *string, int length,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
583 int start, int range, int stop,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
584 size_t nmatch, regmatch_t pmatch[],
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
585 int eflags)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
586 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
587 reg_errcode_t err;
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
588 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
589 int left_lim, right_lim, incr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
590 int fl_longest_match, match_first, match_kind, match_last = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
591 int extra_nmatch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
592 int sb, ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
593 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
594 re_match_context_t mctx = { .dfa = dfa };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
595 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
596 re_match_context_t mctx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
597 #endif
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
598 char *fastmap = (preg->re_fastmap != NULL && preg->re_fastmap_accurate
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
599 && range && !preg->re_can_be_null) ? preg->re_fastmap : NULL;
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
600 unsigned REG_TRANSLATE_TYPE t =
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
601 (unsigned REG_TRANSLATE_TYPE) preg->re_translate;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
602
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
603 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
604 memset (&mctx, '\0', sizeof (re_match_context_t));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
605 mctx.dfa = dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
606 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
607
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
608 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
609 nmatch -= extra_nmatch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
610
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
611 /* Check if the DFA haven't been compiled. */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
612 if (BE (preg->re_used == 0 || dfa->init_state == NULL
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
613 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
614 || dfa->init_state_begbuf == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
615 return REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
616
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
617 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
618 /* We assume front-end functions already check them. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
619 assert (start + range >= 0 && start + range <= length);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
620 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
621
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
622 /* If initial states with non-begbuf contexts have no elements,
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
623 the regex must be anchored. If preg->re_newline_anchor is set,
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
624 we'll never use init_state_nl, so do not check it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
625 if (dfa->init_state->nodes.nelem == 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
626 && dfa->init_state_word->nodes.nelem == 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
627 && (dfa->init_state_nl->nodes.nelem == 0
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
628 || !preg->re_newline_anchor))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
629 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
630 if (start != 0 && start + range != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
631 return REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
632 start = range = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
633 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
634
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
635 /* We must check the longest matching, if nmatch > 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
636 fl_longest_match = (nmatch != 0 || dfa->nbackref);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
637
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
638 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
639 preg->re_translate,
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
640 preg->re_syntax & REG_IGNORE_CASE, dfa);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
641 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
642 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
643 mctx.input.stop = stop;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
644 mctx.input.raw_stop = stop;
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
645 mctx.input.newline_anchor = preg->re_newline_anchor;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
646
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
647 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
648 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
649 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
650
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
651 /* We will log all the DFA states through which the dfa pass,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
652 if nmatch > 1, or this dfa has "multibyte node", which is a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
653 back-reference or a node which can accept multibyte character or
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
654 multi character collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
655 if (nmatch > 1 || dfa->has_mb_node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
656 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
657 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
658 if (BE (mctx.state_log == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
659 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
660 err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
661 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
662 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
663 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
664 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
665 mctx.state_log = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
666
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
667 match_first = start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
668 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
669 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
670
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
671 /* Check incrementally whether of not the input string match. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
672 incr = (range < 0) ? -1 : 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
673 left_lim = (range < 0) ? start + range : start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
674 right_lim = (range < 0) ? start : start + range;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
675 sb = dfa->mb_cur_max == 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
676 match_kind =
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
677 (fastmap
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
678 ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
679 | (range >= 0 ? 2 : 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
680 | (t != NULL ? 1 : 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
681 : 8);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
682
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
683 for (;; match_first += incr)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
684 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
685 err = REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
686 if (match_first < left_lim || right_lim < match_first)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
687 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
688
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
689 /* Advance as rapidly as possible through the string, until we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
690 find a plausible place to start matching. This may be done
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
691 with varying efficiency, so there are various possibilities:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
692 only the most common of them are specialized, in order to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
693 save on code size. We use a switch statement for speed. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
694 switch (match_kind)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
695 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
696 case 8:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
697 /* No fastmap. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
698 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
699
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
700 case 7:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
701 /* Fastmap with single-byte translation, match forward. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
702 while (BE (match_first < right_lim, 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
703 && !fastmap[t[(unsigned char) string[match_first]]])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
704 ++match_first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
705 goto forward_match_found_start_or_reached_end;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
706
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
707 case 6:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
708 /* Fastmap without translation, match forward. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
709 while (BE (match_first < right_lim, 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
710 && !fastmap[(unsigned char) string[match_first]])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
711 ++match_first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
712
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
713 forward_match_found_start_or_reached_end:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
714 if (BE (match_first == right_lim, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
715 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
716 ch = match_first >= length
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
717 ? 0 : (unsigned char) string[match_first];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
718 if (!fastmap[t ? t[ch] : ch])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
719 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
720 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
721 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
722
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
723 case 4:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
724 case 5:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
725 /* Fastmap without multi-byte translation, match backwards. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
726 while (match_first >= left_lim)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
727 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
728 ch = match_first >= length
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
729 ? 0 : (unsigned char) string[match_first];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
730 if (fastmap[t ? t[ch] : ch])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
731 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
732 --match_first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
733 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
734 if (match_first < left_lim)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
735 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
736 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
737
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
738 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
739 /* In this case, we can't determine easily the current byte,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
740 since it might be a component byte of a multibyte
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
741 character. Then we use the constructed buffer instead. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
742 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
743 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
744 /* If MATCH_FIRST is out of the valid range, reconstruct the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
745 buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
746 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
747 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
748 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
749 err = re_string_reconstruct (&mctx.input, match_first,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
750 eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
751 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
752 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
753
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
754 offset = match_first - mctx.input.raw_mbs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
755 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
756 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
757 Note that MATCH_FIRST must not be smaller than 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
758 ch = (match_first >= length
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
759 ? 0 : re_string_byte_at (&mctx.input, offset));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
760 if (fastmap[ch])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
761 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
762 match_first += incr;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
763 if (match_first < left_lim || match_first > right_lim)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
764 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
765 err = REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
766 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
767 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
768 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
769 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
770 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
771
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
772 /* Reconstruct the buffers so that the matcher can assume that
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
773 the matching starts from the beginning of the buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
774 err = re_string_reconstruct (&mctx.input, match_first, eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
775 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
776 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
777
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
778 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
779 /* Don't consider this char as a possible match start if it part,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
780 yet isn't the head, of a multibyte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
781 if (!sb && !re_string_first_byte (&mctx.input, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
782 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
783 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
784
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
785 /* It seems to be appropriate one, then use the matcher. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
786 /* We assume that the matching starts from 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
787 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
788 match_last = check_matching (&mctx, fl_longest_match,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
789 range >= 0 ? &match_first : NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
790 if (match_last != -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
791 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
792 if (BE (match_last == -2, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
793 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
794 err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
795 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
796 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
797 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
798 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
799 mctx.match_last = match_last;
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
800 if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
801 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
802 re_dfastate_t *pstate = mctx.state_log[match_last];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
803 mctx.last_node = check_halt_state_context (&mctx, pstate,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
804 match_last);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
805 }
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
806 if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
807 || dfa->nbackref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
808 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
809 err = prune_impossible_nodes (&mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
810 if (err == REG_NOERROR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
811 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
812 if (BE (err != REG_NOMATCH, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
813 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
814 match_last = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
815 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
816 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
817 break; /* We found a match. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
818 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
819 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
820
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
821 match_ctx_clean (&mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
822 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
823
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
824 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
825 assert (match_last != -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
826 assert (err == REG_NOERROR);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
827 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
828
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
829 /* Set pmatch[] if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
830 if (nmatch > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
831 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
832 int reg_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
833
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
834 /* Initialize registers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
835 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
836 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
837
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
838 /* Set the points where matching start/end. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
839 pmatch[0].rm_so = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
840 pmatch[0].rm_eo = mctx.match_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
841
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
842 if (!preg->re_no_sub && nmatch > 1)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
843 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
844 err = set_regs (preg, &mctx, nmatch, pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
845 dfa->has_plural_match && dfa->nbackref > 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
846 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
847 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
848 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
849
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
850 /* At last, add the offset to the each registers, since we slided
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
851 the buffers so that we could assume that the matching starts
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
852 from 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
853 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
854 if (pmatch[reg_idx].rm_so != -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
855 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
856 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
857 if (BE (mctx.input.offsets_needed != 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
858 {
6173
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
859 pmatch[reg_idx].rm_so =
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
860 (pmatch[reg_idx].rm_so == mctx.input.valid_len
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
861 ? mctx.input.valid_raw_len
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
862 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
863 pmatch[reg_idx].rm_eo =
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
864 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
865 ? mctx.input.valid_raw_len
0728607db20f * lib/regexec.c (re_search_internal): Simplify update of
Paul Eggert <eggert@cs.ucla.edu>
parents: 6171
diff changeset
866 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
867 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
868 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
869 assert (mctx.input.offsets_needed == 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
870 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
871 pmatch[reg_idx].rm_so += match_first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
872 pmatch[reg_idx].rm_eo += match_first;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
873 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
874 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
875 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
876 pmatch[nmatch + reg_idx].rm_so = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
877 pmatch[nmatch + reg_idx].rm_eo = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
878 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
879
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
880 if (dfa->subexp_map)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
881 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
882 if (dfa->subexp_map[reg_idx] != reg_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
883 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
884 pmatch[reg_idx + 1].rm_so
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
885 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
886 pmatch[reg_idx + 1].rm_eo
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
887 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
888 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
889 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
890
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
891 free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
892 re_free (mctx.state_log);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
893 if (dfa->nbackref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
894 match_ctx_free (&mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
895 re_string_destruct (&mctx.input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
896 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
897 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
898
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
899 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
900 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
901 prune_impossible_nodes (re_match_context_t *mctx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
902 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
903 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
904 int halt_node, match_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
905 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
906 re_dfastate_t **sifted_states;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
907 re_dfastate_t **lim_states = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
908 re_sift_context_t sctx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
909 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
910 assert (mctx->state_log != NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
911 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
912 match_last = mctx->match_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
913 halt_node = mctx->last_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
914 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
915 if (BE (sifted_states == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
916 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
917 ret = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
918 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
919 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
920 if (dfa->nbackref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
921 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
922 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
923 if (BE (lim_states == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
924 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
925 ret = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
926 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
927 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
928 while (1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
929 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
930 memset (lim_states, '\0',
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
931 sizeof (re_dfastate_t *) * (match_last + 1));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
932 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
933 match_last);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
934 ret = sift_states_backward (mctx, &sctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
935 re_node_set_free (&sctx.limits);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
936 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
937 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
938 if (sifted_states[0] != NULL || lim_states[0] != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
939 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
940 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
941 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
942 --match_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
943 if (match_last < 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
944 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
945 ret = REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
946 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
947 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
948 } while (mctx->state_log[match_last] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
949 || !mctx->state_log[match_last]->halt);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
950 halt_node = check_halt_state_context (mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
951 mctx->state_log[match_last],
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
952 match_last);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
953 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
954 ret = merge_state_array (dfa, sifted_states, lim_states,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
955 match_last + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
956 re_free (lim_states);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
957 lim_states = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
958 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
959 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
960 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
961 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
962 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
963 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
964 ret = sift_states_backward (mctx, &sctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
965 re_node_set_free (&sctx.limits);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
966 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
967 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
968 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
969 re_free (mctx->state_log);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
970 mctx->state_log = sifted_states;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
971 sifted_states = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
972 mctx->last_node = halt_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
973 mctx->match_last = match_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
974 ret = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
975 free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
976 re_free (sifted_states);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
977 re_free (lim_states);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
978 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
979 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
980
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
981 /* Acquire an initial state and return it.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
982 We must select appropriate initial state depending on the context,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
983 since initial states may have constraints like "\<", "^", etc.. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
984
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
985 static inline re_dfastate_t *
6084
87075d885cc5 * config/srclist.txt: Add glibc bug 1231.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6082
diff changeset
986 __attribute ((always_inline)) internal_function
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
987 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
988 int idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
989 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
990 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
991 if (dfa->init_state->has_constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
992 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
993 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
994 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
995 if (IS_WORD_CONTEXT (context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
996 return dfa->init_state_word;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
997 else if (IS_ORDINARY_CONTEXT (context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
998 return dfa->init_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
999 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1000 return dfa->init_state_begbuf;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1001 else if (IS_NEWLINE_CONTEXT (context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1002 return dfa->init_state_nl;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1003 else if (IS_BEGBUF_CONTEXT (context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1004 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1005 /* It is relatively rare case, then calculate on demand. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1006 return re_acquire_state_context (err, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1007 dfa->init_state->entrance_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1008 context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1009 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1010 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1011 /* Must not happen? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1012 return dfa->init_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1013 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1014 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1015 return dfa->init_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1016 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1017
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1018 /* Check whether the regular expression match input string INPUT or not,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1019 and return the index where the matching end, return -1 if not match,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1020 or return -2 in case of an error.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1021 FL_LONGEST_MATCH means we want the POSIX longest matching.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1022 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1023 next place where we may want to try matching.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1024 Note that the matcher assume that the maching starts from the current
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1025 index of the buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1026
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1027 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1028 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1029 check_matching (re_match_context_t *mctx, int fl_longest_match,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1030 int *p_match_first)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1031 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1032 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1033 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1034 int match = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1035 int match_last = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1036 int cur_str_idx = re_string_cur_idx (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1037 re_dfastate_t *cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1038 int at_init_state = p_match_first != NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1039 int next_start_idx = cur_str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1040
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1041 err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1042 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1043 /* An initial state must not be NULL (invalid). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1044 if (BE (cur_state == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1045 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1046 assert (err == REG_ESPACE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1047 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1048 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1049
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1050 if (mctx->state_log != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1051 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1052 mctx->state_log[cur_str_idx] = cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1053
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1054 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1055 later. E.g. Processing back references. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1056 if (BE (dfa->nbackref, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1057 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1058 at_init_state = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1059 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1060 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1061 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1062
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1063 if (cur_state->has_backref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1064 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1065 err = transit_state_bkref (mctx, &cur_state->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1066 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1067 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1068 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1069 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1070 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1071
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1072 /* If the RE accepts NULL string. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1073 if (BE (cur_state->halt, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1074 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1075 if (!cur_state->has_constraint
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1076 || check_halt_state_context (mctx, cur_state, cur_str_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1077 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1078 if (!fl_longest_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1079 return cur_str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1080 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1081 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1082 match_last = cur_str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1083 match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1084 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1085 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1086 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1087
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1088 while (!re_string_eoi (&mctx->input))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1089 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1090 re_dfastate_t *old_state = cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1091 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1092
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1093 if (BE (next_char_idx >= mctx->input.bufs_len, 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1094 || (BE (next_char_idx >= mctx->input.valid_len, 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1095 && mctx->input.valid_len < mctx->input.len))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1096 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1097 err = extend_buffers (mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1098 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1099 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1100 assert (err == REG_ESPACE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1101 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1102 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1103 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1104
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1105 cur_state = transit_state (&err, mctx, cur_state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1106 if (mctx->state_log != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1107 cur_state = merge_state_with_log (&err, mctx, cur_state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1108
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1109 if (cur_state == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1110 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1111 /* Reached the invalid state or an error. Try to recover a valid
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1112 state using the state log, if available and if we have not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1113 already found a valid (even if not the longest) match. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1114 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1115 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1116
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1117 if (mctx->state_log == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1118 || (match && !fl_longest_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1119 || (cur_state = find_recover_state (&err, mctx)) == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1120 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1121 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1122
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1123 if (BE (at_init_state, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1124 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1125 if (old_state == cur_state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1126 next_start_idx = next_char_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1127 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1128 at_init_state = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1129 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1130
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1131 if (cur_state->halt)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1132 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1133 /* Reached a halt state.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1134 Check the halt state can satisfy the current context. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1135 if (!cur_state->has_constraint
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1136 || check_halt_state_context (mctx, cur_state,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1137 re_string_cur_idx (&mctx->input)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1138 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1139 /* We found an appropriate halt state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1140 match_last = re_string_cur_idx (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1141 match = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1142
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1143 /* We found a match, do not modify match_first below. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1144 p_match_first = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1145 if (!fl_longest_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1146 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1147 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1148 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1149 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1150
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1151 if (p_match_first)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1152 *p_match_first += next_start_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1153
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1154 return match_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1155 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1156
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1157 /* Check NODE match the current context. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1158
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1159 static int
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1160 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1161 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1162 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1163 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1164 unsigned int constraint = dfa->nodes[node].constraint;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1165 if (type != END_OF_RE)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1166 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1167 if (!constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1168 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1169 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1170 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1171 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1172 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1173
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1174 /* Check the halt state STATE match the current context.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1175 Return 0 if not match, if the node, STATE has, is a halt node and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1176 match the context, return the node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1177
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1178 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1179 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1180 check_halt_state_context (const re_match_context_t *mctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1181 const re_dfastate_t *state, int idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1182 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1183 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1184 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1185 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1186 assert (state->halt);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1187 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1188 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1189 for (i = 0; i < state->nodes.nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1190 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1191 return state->nodes.elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1192 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1193 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1194
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1195 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1196 corresponding to the DFA).
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1197 Return the destination node, and update EPS_VIA_NODES, return -1 in case
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1198 of errors. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1199
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1200 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1201 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1202 proceed_next_node (const re_match_context_t *mctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1203 int nregs, regmatch_t *regs, int *pidx, int node,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1204 re_node_set *eps_via_nodes, struct re_fail_stack_t *fs)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1205 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1206 re_dfa_t *const dfa = mctx->dfa;
6069
b3ee77788435 (proceed_next_node): Redo local variables to avoid GCC shadowing warnings.
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1207 int i, err;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1208 if (IS_EPSILON_NODE (dfa->nodes[node].type))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1209 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1210 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1211 re_node_set *edests = &dfa->edests[node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1212 int dest_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1213 err = re_node_set_insert (eps_via_nodes, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1214 if (BE (err < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1215 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1216 /* Pick up a valid destination, or return -1 if none is found. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1217 for (dest_node = -1, i = 0; i < edests->nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1218 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1219 int candidate = edests->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1220 if (!re_node_set_contains (cur_nodes, candidate))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1221 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1222 if (dest_node == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1223 dest_node = candidate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1224
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1225 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1226 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1227 /* In order to avoid infinite loop like "(a*)*", return the second
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1228 epsilon-transition if the first was already considered. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1229 if (re_node_set_contains (eps_via_nodes, dest_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1230 return candidate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1231
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1232 /* Otherwise, push the second epsilon-transition on the fail stack. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1233 else if (fs != NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1234 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1235 eps_via_nodes))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1236 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1237
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1238 /* We know we are going to exit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1239 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1240 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1241 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1242 return dest_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1243 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1244 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1245 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1246 int naccepted = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1247 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1248
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1249 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1250 if (dfa->nodes[node].accept_mb)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1251 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1252 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1253 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1254 if (type == OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1255 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1256 int subexp_idx = dfa->nodes[node].opr.idx + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1257 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1258 if (fs != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1259 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1260 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1261 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1262 else if (naccepted)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1263 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1264 char *buf = (char *) re_string_get_buffer (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1265 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1266 naccepted) != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1267 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1268 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1269 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1270
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1271 if (naccepted == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1272 {
6069
b3ee77788435 (proceed_next_node): Redo local variables to avoid GCC shadowing warnings.
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1273 int dest_node;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1274 err = re_node_set_insert (eps_via_nodes, node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1275 if (BE (err < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1276 return -2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1277 dest_node = dfa->edests[node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1278 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1279 dest_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1280 return dest_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1281 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1282 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1283
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1284 if (naccepted != 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1285 || check_node_accept (mctx, dfa->nodes + node, *pidx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1286 {
6069
b3ee77788435 (proceed_next_node): Redo local variables to avoid GCC shadowing warnings.
Paul Eggert <eggert@cs.ucla.edu>
parents: 5968
diff changeset
1287 int dest_node = dfa->nexts[node];
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1288 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1289 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1290 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1291 dest_node)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1292 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1293 re_node_set_empty (eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1294 return dest_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1295 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1296 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1297 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1298 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1299
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1300 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1301 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1302 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1303 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1304 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1305 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1306 int num = fs->num++;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1307 if (fs->num == fs->alloc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1308 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
1309 struct re_fail_stack_ent_t *new_array =
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
1310 re_realloc (fs->stack, struct re_fail_stack_ent_t, fs->alloc * 2);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1311 if (new_array == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1312 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1313 fs->alloc *= 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1314 fs->stack = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1315 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1316 fs->stack[num].idx = str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1317 fs->stack[num].node = dest_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1318 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1319 if (fs->stack[num].regs == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1320 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1321 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1322 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1323 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1324 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1325
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1326 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1327 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1328 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1329 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1330 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1331 int num = --fs->num;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1332 assert (num >= 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1333 *pidx = fs->stack[num].idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1334 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1335 re_node_set_free (eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1336 re_free (fs->stack[num].regs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1337 *eps_via_nodes = fs->stack[num].eps_via_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1338 return fs->stack[num].node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1339 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1340
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1341 /* Set the positions where the subexpressions are starts/ends to registers
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1342 PMATCH.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1343 Note: We assume that pmatch[0] is already set, and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1344 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1345
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1346 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1347 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1348 set_regs (const regex_t *preg, const re_match_context_t *mctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1349 size_t nmatch, regmatch_t *pmatch, int fl_backtrack)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1350 {
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
1351 re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1352 int idx, cur_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1353 re_node_set eps_via_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1354 struct re_fail_stack_t *fs;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1355 struct re_fail_stack_t fs_body = { 0, 2, NULL };
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1356 regmatch_t *prev_idx_match;
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1357 int prev_idx_match_malloced = 0;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1358
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1359 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1360 assert (nmatch > 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1361 assert (mctx->state_log != NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1362 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1363 if (fl_backtrack)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1364 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1365 fs = &fs_body;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1366 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1367 if (fs->stack == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1368 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1369 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1370 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1371 fs = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1372
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1373 cur_node = dfa->init_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1374 re_node_set_init_empty (&eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1375
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1376 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1377 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1378 else
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1379 {
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1380 prev_idx_match = re_malloc (regmatch_t, nmatch);
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1381 if (prev_idx_match == NULL)
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1382 {
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1383 free_fail_stack_return (fs);
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1384 return REG_ESPACE;
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1385 }
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1386 prev_idx_match_malloced = 1;
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1387 }
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1388 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1389
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1390 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1391 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1392 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1393
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1394 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1395 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1396 int reg_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1397 if (fs)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1398 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1399 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1400 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1401 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1402 if (reg_idx == nmatch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1403 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1404 re_node_set_free (&eps_via_nodes);
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1405 if (prev_idx_match_malloced)
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1406 re_free (prev_idx_match);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1407 return free_fail_stack_return (fs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1408 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1409 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1410 &eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1411 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1412 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1413 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1414 re_node_set_free (&eps_via_nodes);
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1415 if (prev_idx_match_malloced)
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1416 re_free (prev_idx_match);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1417 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1418 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1419 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1420
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1421 /* Proceed to next node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1422 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1423 &eps_via_nodes, fs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1424
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1425 if (BE (cur_node < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1426 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1427 if (BE (cur_node == -2, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1428 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1429 re_node_set_free (&eps_via_nodes);
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1430 if (prev_idx_match_malloced)
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1431 re_free (prev_idx_match);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1432 free_fail_stack_return (fs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1433 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1434 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1435 if (fs)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1436 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1437 &eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1438 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1439 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1440 re_node_set_free (&eps_via_nodes);
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1441 if (prev_idx_match_malloced)
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1442 re_free (prev_idx_match);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1443 return REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1444 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1445 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1446 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1447 re_node_set_free (&eps_via_nodes);
6125
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1448 if (prev_idx_match_malloced)
4af02335816a * config/srclist.txt: Add glibc bug 1245.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6119
diff changeset
1449 re_free (prev_idx_match);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1450 return free_fail_stack_return (fs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1451 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1452
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1453 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1454 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1455 free_fail_stack_return (struct re_fail_stack_t *fs)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1456 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1457 if (fs)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1458 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1459 int fs_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1460 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1461 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1462 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1463 re_free (fs->stack[fs_idx].regs);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1464 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1465 re_free (fs->stack);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1466 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1467 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1468 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1469
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1470 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1471 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1472 update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1473 int cur_node, int cur_idx, int nmatch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1474 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1475 int type = dfa->nodes[cur_node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1476 if (type == OP_OPEN_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1477 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1478 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1479
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1480 /* We are at the first node of this sub expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1481 if (reg_num < nmatch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1482 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1483 pmatch[reg_num].rm_so = cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1484 pmatch[reg_num].rm_eo = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1485 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1486 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1487 else if (type == OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1488 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1489 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1490 if (reg_num < nmatch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1491 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1492 /* We are at the last node of this sub expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1493 if (pmatch[reg_num].rm_so < cur_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1494 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1495 pmatch[reg_num].rm_eo = cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1496 /* This is a non-empty match or we are not inside an optional
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1497 subexpression. Accept this right away. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1498 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1499 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1500 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1501 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1502 if (dfa->nodes[cur_node].opt_subexp
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1503 && prev_idx_match[reg_num].rm_so != -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1504 /* We transited through an empty match for an optional
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1505 subexpression, like (a?)*, and this is not the subexp's
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1506 first match. Copy back the old content of the registers
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1507 so that matches of an inner subexpression are undone as
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1508 well, like in ((a?))*. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1509 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1510 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1511 /* We completed a subexpression, but it may be part of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1512 an optional one, so do not update PREV_IDX_MATCH. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1513 pmatch[reg_num].rm_eo = cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1514 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1515 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1516 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1517 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1518
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1519 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1520 and sift the nodes in each states according to the following rules.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1521 Updated state_log will be wrote to STATE_LOG.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1522
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1523 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1524 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1525 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1526 the LAST_NODE, we throw away the node `a'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1527 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1528 string `s' and transit to `b':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1529 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1530 away the node `a'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1531 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1532 thrown away, we throw away the node `a'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1533 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1534 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1535 node `a'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1536 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1537 we throw away the node `a'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1538
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1539 #define STATE_NODE_CONTAINS(state,node) \
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1540 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1541
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1542 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1543 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1544 sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1545 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1546 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1547 int null_cnt = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1548 int str_idx = sctx->last_str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1549 re_node_set cur_dest;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1550
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1551 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1552 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1553 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1554
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1555 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1556 transit to the last_node and the last_node itself. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1557 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1558 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1559 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1560 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1561 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1562 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1563
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1564 /* Then check each states in the state_log. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1565 while (str_idx > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1566 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1567 /* Update counters. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1568 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1569 if (null_cnt > mctx->max_mb_elem_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1570 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1571 memset (sctx->sifted_states, '\0',
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1572 sizeof (re_dfastate_t *) * str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1573 re_node_set_free (&cur_dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1574 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1575 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1576 re_node_set_empty (&cur_dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1577 --str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1578
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1579 if (mctx->state_log[str_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1580 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1581 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1582 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1583 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1584 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1585
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1586 /* Add all the nodes which satisfy the following conditions:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1587 - It can epsilon transit to a node in CUR_DEST.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1588 - It is in CUR_SRC.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1589 And update state_log. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1590 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1591 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1592 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1593 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1594 err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1595 free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1596 re_node_set_free (&cur_dest);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1597 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1598 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1599
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1600 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1601 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1602 build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1603 int str_idx, re_node_set *cur_dest)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1604 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1605 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1606 re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1607 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1608
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1609 /* Then build the next sifted state.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1610 We build the next sifted state on `cur_dest', and update
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1611 `sifted_states[str_idx]' with `cur_dest'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1612 Note:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1613 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1614 `cur_src' points the node_set of the old `state_log[str_idx]'
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1615 (with the epsilon nodes pre-filtered out). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1616 for (i = 0; i < cur_src->nelem; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1617 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1618 int prev_node = cur_src->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1619 int naccepted = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1620 int ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1621
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1622 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1623 re_token_type_t type = dfa->nodes[prev_node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1624 assert (!IS_EPSILON_NODE (type));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1625 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1626 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1627 /* If the node may accept `multi byte'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1628 if (dfa->nodes[prev_node].accept_mb)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1629 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1630 str_idx, sctx->last_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1631 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1632
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1633 /* We don't check backreferences here.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1634 See update_cur_sifted_state(). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1635 if (!naccepted
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1636 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1637 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1638 dfa->nexts[prev_node]))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1639 naccepted = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1640
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1641 if (naccepted == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1642 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1643
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1644 if (sctx->limits.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1645 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1646 int to_idx = str_idx + naccepted;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1647 if (check_dst_limits (mctx, &sctx->limits,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1648 dfa->nexts[prev_node], to_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1649 prev_node, str_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1650 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1651 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1652 ret = re_node_set_insert (cur_dest, prev_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1653 if (BE (ret == -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1654 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1655 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1656
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1657 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1658 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1659
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1660 /* Helper functions. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1661
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1662 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1663 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1664 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1665 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1666 int top = mctx->state_log_top;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1667
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1668 if (next_state_log_idx >= mctx->input.bufs_len
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1669 || (next_state_log_idx >= mctx->input.valid_len
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1670 && mctx->input.valid_len < mctx->input.len))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1671 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1672 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1673 err = extend_buffers (mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1674 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1675 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1676 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1677
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1678 if (top < next_state_log_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1679 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1680 memset (mctx->state_log + top + 1, '\0',
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1681 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1682 mctx->state_log_top = next_state_log_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1683 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1684 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1685 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1686
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1687 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1688 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1689 merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1690 int num)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1691 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1692 int st_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1693 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1694 for (st_idx = 0; st_idx < num; ++st_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1695 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1696 if (dst[st_idx] == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1697 dst[st_idx] = src[st_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1698 else if (src[st_idx] != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1699 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1700 re_node_set merged_set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1701 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1702 &src[st_idx]->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1703 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1704 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1705 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1706 re_node_set_free (&merged_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1707 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1708 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1709 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1710 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1711 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1712 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1713
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1714 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1715 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1716 update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1717 int str_idx, re_node_set *dest_nodes)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1718 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1719 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1720 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1721 const re_node_set *candidates;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1722 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1723 : &mctx->state_log[str_idx]->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1724
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1725 if (dest_nodes->nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1726 sctx->sifted_states[str_idx] = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1727 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1728 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1729 if (candidates)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1730 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1731 /* At first, add the nodes which can epsilon transit to a node in
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1732 DEST_NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1733 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1734 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1735 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1736
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1737 /* Then, check the limitations in the current sift_context. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1738 if (sctx->limits.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1739 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1740 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1741 mctx->bkref_ents, str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1742 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1743 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1744 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1745 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1746
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1747 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1748 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1749 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1750 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1751
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1752 if (candidates && mctx->state_log[str_idx]->has_backref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1753 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1754 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1755 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1756 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1757 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1758 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1759 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1760
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1761 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1762 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1763 add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1764 const re_node_set *candidates)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1765 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1766 reg_errcode_t err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1767 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1768
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1769 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1770 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1771 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1772
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1773 if (!state->inveclosure.alloc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1774 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1775 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1776 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1777 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1778 for (i = 0; i < dest_nodes->nelem; i++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1779 re_node_set_merge (&state->inveclosure,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1780 dfa->inveclosures + dest_nodes->elems[i]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1781 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1782 return re_node_set_add_intersect (dest_nodes, candidates,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1783 &state->inveclosure);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1784 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1785
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1786 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1787 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1788 sub_epsilon_src_nodes (re_dfa_t *dfa, int node, re_node_set *dest_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1789 const re_node_set *candidates)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1790 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1791 int ecl_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1792 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1793 re_node_set *inv_eclosure = dfa->inveclosures + node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1794 re_node_set except_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1795 re_node_set_init_empty (&except_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1796 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1797 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1798 int cur_node = inv_eclosure->elems[ecl_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1799 if (cur_node == node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1800 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1801 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1802 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1803 int edst1 = dfa->edests[cur_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1804 int edst2 = ((dfa->edests[cur_node].nelem > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1805 ? dfa->edests[cur_node].elems[1] : -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1806 if ((!re_node_set_contains (inv_eclosure, edst1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1807 && re_node_set_contains (dest_nodes, edst1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1808 || (edst2 > 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1809 && !re_node_set_contains (inv_eclosure, edst2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1810 && re_node_set_contains (dest_nodes, edst2)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1811 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1812 err = re_node_set_add_intersect (&except_nodes, candidates,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1813 dfa->inveclosures + cur_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1814 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1815 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1816 re_node_set_free (&except_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1817 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1818 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1819 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1820 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1821 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1822 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1823 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1824 int cur_node = inv_eclosure->elems[ecl_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1825 if (!re_node_set_contains (&except_nodes, cur_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1826 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1827 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1828 re_node_set_remove_at (dest_nodes, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1829 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1830 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1831 re_node_set_free (&except_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1832 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1833 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1834
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1835 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1836 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1837 check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1838 int dst_node, int dst_idx, int src_node, int src_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1839 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1840 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1841 int lim_idx, src_pos, dst_pos;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1842
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1843 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1844 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1845 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1846 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1847 int subexp_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1848 struct re_backref_cache_entry *ent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1849 ent = mctx->bkref_ents + limits->elems[lim_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1850 subexp_idx = dfa->nodes[ent->node].opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1851
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1852 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1853 subexp_idx, dst_node, dst_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1854 dst_bkref_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1855 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1856 subexp_idx, src_node, src_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1857 src_bkref_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1858
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1859 /* In case of:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1860 <src> <dst> ( <subexp> )
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1861 ( <subexp> ) <src> <dst>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1862 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1863 if (src_pos == dst_pos)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1864 continue; /* This is unrelated limitation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1865 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1866 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1867 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1868 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1869 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1870
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1871 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1872 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1873 check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1874 int subexp_idx, int from_node, int bkref_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1875 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1876 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1877 re_node_set *eclosures = dfa->eclosures + from_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1878 int node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1879
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1880 /* Else, we are on the boundary: examine the nodes on the epsilon
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1881 closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1882 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1883 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1884 int node = eclosures->elems[node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1885 switch (dfa->nodes[node].type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1886 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1887 case OP_BACK_REF:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1888 if (bkref_idx != -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1889 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1890 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1891 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1892 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1893 int dst, cpos;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1894
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1895 if (ent->node != node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1896 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1897
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
1898 if (subexp_idx
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
1899 < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
1900 && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1901 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1902
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1903 /* Recurse trying to reach the OP_OPEN_SUBEXP and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1904 OP_CLOSE_SUBEXP cases below. But, if the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1905 destination node is the same node as the source
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1906 node, don't recurse because it would cause an
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1907 infinite loop: a regex that exhibits this behavior
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1908 is ()\1*\1* */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1909 dst = dfa->edests[node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1910 if (dst == from_node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1911 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1912 if (boundaries & 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1913 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1914 else /* if (boundaries & 2) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1915 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1916 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1917
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1918 cpos =
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1919 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1920 dst, bkref_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1921 if (cpos == -1 /* && (boundaries & 1) */)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1922 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1923 if (cpos == 0 && (boundaries & 2))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1924 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1925
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
1926 if (subexp_idx
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
1927 < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
1928 ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1929 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1930 while (ent++->more);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1931 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1932 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1933
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1934 case OP_OPEN_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1935 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1936 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1937 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1938
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1939 case OP_CLOSE_SUBEXP:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1940 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1941 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1942 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1943
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1944 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1945 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1946 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1947 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1948
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1949 return (boundaries & 2) ? 1 : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1950 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1951
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1952 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1953 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1954 check_dst_limits_calc_pos (re_match_context_t *mctx, int limit, int subexp_idx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1955 int from_node, int str_idx, int bkref_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1956 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1957 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1958 int boundaries;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1959
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1960 /* If we are outside the range of the subexpression, return -1 or 1. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1961 if (str_idx < lim->subexp_from)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1962 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1963
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1964 if (lim->subexp_to < str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1965 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1966
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1967 /* If we are within the subexpression, return 0. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1968 boundaries = (str_idx == lim->subexp_from);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1969 boundaries |= (str_idx == lim->subexp_to) << 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1970 if (boundaries == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1971 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1972
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1973 /* Else, examine epsilon closure. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1974 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1975 from_node, bkref_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1976 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1977
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1978 /* Check the limitations of sub expressions LIMITS, and remove the nodes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1979 which are against limitations from DEST_NODES. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1980
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1981 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1982 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1983 check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1984 const re_node_set *candidates, re_node_set *limits,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
1985 struct re_backref_cache_entry *bkref_ents, int str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1986 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1987 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1988 int node_idx, lim_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1989
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1990 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1991 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1992 int subexp_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1993 struct re_backref_cache_entry *ent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1994 ent = bkref_ents + limits->elems[lim_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1995
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1996 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1997 continue; /* This is unrelated limitation. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1998
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
1999 subexp_idx = dfa->nodes[ent->node].opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2000 if (ent->subexp_to == str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2001 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2002 int ops_node = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2003 int cls_node = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2004 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2005 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2006 int node = dest_nodes->elems[node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2007 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2008 if (type == OP_OPEN_SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2009 && subexp_idx == dfa->nodes[node].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2010 ops_node = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2011 else if (type == OP_CLOSE_SUBEXP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2012 && subexp_idx == dfa->nodes[node].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2013 cls_node = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2014 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2015
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2016 /* Check the limitation of the open subexpression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2017 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2018 if (ops_node >= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2019 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2020 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2021 candidates);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2022 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2023 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2024 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2025
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2026 /* Check the limitation of the close subexpression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2027 if (cls_node >= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2028 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2029 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2030 int node = dest_nodes->elems[node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2031 if (!re_node_set_contains (dfa->inveclosures + node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2032 cls_node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2033 && !re_node_set_contains (dfa->eclosures + node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2034 cls_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2035 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2036 /* It is against this limitation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2037 Remove it form the current sifted state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2038 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2039 candidates);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2040 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2041 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2042 --node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2043 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2044 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2045 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2046 else /* (ent->subexp_to != str_idx) */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2047 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2048 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2049 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2050 int node = dest_nodes->elems[node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2051 re_token_type_t type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2052 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2053 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2054 if (subexp_idx != dfa->nodes[node].opr.idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2055 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2056 /* It is against this limitation.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2057 Remove it form the current sifted state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2058 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2059 candidates);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2060 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2061 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2062 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2063 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2064 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2065 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2066 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2067 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2068
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2069 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2070 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2071 sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2072 int str_idx, const re_node_set *candidates)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2073 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2074 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2075 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2076 int node_idx, node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2077 re_sift_context_t local_sctx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2078 int first_idx = search_cur_bkref_entry (mctx, str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2079
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2080 if (first_idx == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2081 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2082
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2083 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2084
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2085 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2086 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2087 int enabled_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2088 re_token_type_t type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2089 struct re_backref_cache_entry *entry;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2090 node = candidates->elems[node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2091 type = dfa->nodes[node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2092 /* Avoid infinite loop for the REs like "()\1+". */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2093 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2094 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2095 if (type != OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2096 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2097
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2098 entry = mctx->bkref_ents + first_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2099 enabled_idx = first_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2100 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2101 {
6082
f7e7f62a1516 * config/srclist.txt: Add glibc bug 1227.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6079
diff changeset
2102 int subexp_len, to_idx, dst_node, ret;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2103 re_dfastate_t *cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2104
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2105 if (entry->node != node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2106 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2107 subexp_len = entry->subexp_to - entry->subexp_from;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2108 to_idx = str_idx + subexp_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2109 dst_node = (subexp_len ? dfa->nexts[node]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2110 : dfa->edests[node].elems[0]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2111
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2112 if (to_idx > sctx->last_str_idx
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2113 || sctx->sifted_states[to_idx] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2114 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2115 || check_dst_limits (mctx, &sctx->limits, node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2116 str_idx, dst_node, to_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2117 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2118
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2119 if (local_sctx.sifted_states == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2120 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2121 local_sctx = *sctx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2122 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2123 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2124 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2125 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2126 local_sctx.last_node = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2127 local_sctx.last_str_idx = str_idx;
6082
f7e7f62a1516 * config/srclist.txt: Add glibc bug 1227.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6079
diff changeset
2128 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
f7e7f62a1516 * config/srclist.txt: Add glibc bug 1227.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6079
diff changeset
2129 if (BE (ret < 0, 0))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2130 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2131 err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2132 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2133 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2134 cur_state = local_sctx.sifted_states[str_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2135 err = sift_states_backward (mctx, &local_sctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2136 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2137 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2138 if (sctx->limited_states != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2139 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2140 err = merge_state_array (dfa, sctx->limited_states,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2141 local_sctx.sifted_states,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2142 str_idx + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2143 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2144 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2145 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2146 local_sctx.sifted_states[str_idx] = cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2147 re_node_set_remove (&local_sctx.limits, enabled_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2148
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2149 /* mctx->bkref_ents may have changed, reload the pointer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2150 entry = mctx->bkref_ents + enabled_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2151 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2152 while (enabled_idx++, entry++->more);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2153 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2154 err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2155 free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2156 if (local_sctx.sifted_states != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2157 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2158 re_node_set_free (&local_sctx.limits);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2159 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2160
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2161 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2162 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2163
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2164
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2165 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2166 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2167 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2168 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2169 int node_idx, int str_idx, int max_str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2170 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2171 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2172 int naccepted;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2173 /* Check the node can accept `multi byte'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2174 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2175 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2176 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2177 dfa->nexts[node_idx]))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2178 /* The node can't accept the `multi byte', or the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2179 destination was already thrown away, then the node
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2180 could't accept the current input `multi byte'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2181 naccepted = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2182 /* Otherwise, it is sure that the node could accept
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2183 `naccepted' bytes input. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2184 return naccepted;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2185 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2186 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2187
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2188
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2189 /* Functions for state transition. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2190
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2191 /* Return the next state to which the current state STATE will transit by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2192 accepting the current input byte, and update STATE_LOG if necessary.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2193 If STATE can accept a multibyte char/collating element/back reference
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2194 update the destination of STATE_LOG. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2195
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2196 static re_dfastate_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2197 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2198 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2199 re_dfastate_t *state)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2200 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2201 re_dfastate_t **trtable;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2202 unsigned char ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2203
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2204 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2205 /* If the current state can accept multibyte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2206 if (BE (state->accept_mb, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2207 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2208 *err = transit_state_mb (mctx, state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2209 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2210 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2211 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2212 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2213
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2214 /* Then decide the next state with the single byte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2215 #if 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2216 if (0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2217 /* don't use transition table */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2218 return transit_state_sb (err, mctx, state);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2219 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2220
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2221 /* Use transition table */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2222 ch = re_string_fetch_byte (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2223 for (;;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2224 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2225 trtable = state->trtable;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2226 if (BE (trtable != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2227 return trtable[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2228
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2229 trtable = state->word_trtable;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2230 if (BE (trtable != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2231 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2232 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2233 context
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2234 = re_string_context_at (&mctx->input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2235 re_string_cur_idx (&mctx->input) - 1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2236 mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2237 if (IS_WORD_CONTEXT (context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2238 return trtable[ch + SBC_MAX];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2239 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2240 return trtable[ch];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2241 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2242
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2243 if (!build_trtable (mctx->dfa, state))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2244 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2245 *err = REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2246 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2247 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2248
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2249 /* Retry, we now have a transition table. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2250 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2251 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2252
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2253 /* Update the state_log if we need */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2254 re_dfastate_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2255 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2256 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2257 re_dfastate_t *next_state)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2258 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2259 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2260 int cur_idx = re_string_cur_idx (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2261
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2262 if (cur_idx > mctx->state_log_top)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2263 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2264 mctx->state_log[cur_idx] = next_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2265 mctx->state_log_top = cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2266 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2267 else if (mctx->state_log[cur_idx] == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2268 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2269 mctx->state_log[cur_idx] = next_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2270 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2271 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2272 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2273 re_dfastate_t *pstate;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2274 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2275 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2276 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2277 the destination of a multibyte char/collating element/
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2278 back reference. Then the next state is the union set of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2279 these destinations and the results of the transition table. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2280 pstate = mctx->state_log[cur_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2281 log_nodes = pstate->entrance_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2282 if (next_state != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2283 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2284 table_nodes = next_state->entrance_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2285 *err = re_node_set_init_union (&next_nodes, table_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2286 log_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2287 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2288 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2289 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2290 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2291 next_nodes = *log_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2292 /* Note: We already add the nodes of the initial state,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2293 then we don't need to add them here. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2294
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2295 context = re_string_context_at (&mctx->input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2296 re_string_cur_idx (&mctx->input) - 1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2297 mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2298 next_state = mctx->state_log[cur_idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2299 = re_acquire_state_context (err, dfa, &next_nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2300 /* We don't need to check errors here, since the return value of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2301 this function is next_state and ERR is already set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2302
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2303 if (table_nodes != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2304 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2305 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2306
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2307 if (BE (dfa->nbackref, 0) && next_state != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2308 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2309 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2310 later. We must check them here, since the back references in the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2311 next state might use them. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2312 *err = check_subexp_matching_top (mctx, &next_state->nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2313 cur_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2314 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2315 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2316
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2317 /* If the next state has back references. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2318 if (next_state->has_backref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2319 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2320 *err = transit_state_bkref (mctx, &next_state->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2321 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2322 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2323 next_state = mctx->state_log[cur_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2324 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2325 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2326
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2327 return next_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2328 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2329
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2330 /* Skip bytes in the input that correspond to part of a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2331 multi-byte match, then look in the log for a state
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2332 from which to restart matching. */
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2333 static re_dfastate_t *
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2334 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2335 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2336 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2337 re_dfastate_t *cur_state = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2338 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2339 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2340 int max = mctx->state_log_top;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2341 int cur_str_idx = re_string_cur_idx (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2342
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2343 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2344 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2345 if (++cur_str_idx > max)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2346 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2347 re_string_skip_bytes (&mctx->input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2348 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2349 while (mctx->state_log[cur_str_idx] == NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2350
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2351 cur_state = merge_state_with_log (err, mctx, NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2352 }
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
2353 while (*err == REG_NOERROR && cur_state == NULL);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2354 return cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2355 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2356
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2357 /* Helper functions for transit_state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2358
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2359 /* From the node set CUR_NODES, pick up the nodes whose types are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2360 OP_OPEN_SUBEXP and which have corresponding back references in the regular
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2361 expression. And register them to use them later for evaluating the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2362 correspoding back references. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2363
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2364 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2365 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2366 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2367 int str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2368 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2369 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2370 int node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2371 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2372
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2373 /* TODO: This isn't efficient.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2374 Because there might be more than one nodes whose types are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2375 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2376 nodes.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2377 E.g. RE: (a){2} */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2378 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2379 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2380 int node = cur_nodes->elems[node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2381 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
2382 && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
2383 && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2384 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2385 err = match_ctx_add_subtop (mctx, node, str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2386 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2387 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2388 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2389 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2390 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2391 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2392
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2393 #if 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2394 /* Return the next state to which the current state STATE will transit by
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2395 accepting the current input byte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2396
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2397 static re_dfastate_t *
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2398 transit_state_sb (err, mctx, state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2399 reg_errcode_t *err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2400 re_match_context_t *mctx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2401 re_dfastate_t *state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2402 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2403 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2404 re_node_set next_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2405 re_dfastate_t *next_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2406 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2407 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2408
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2409 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2410 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2411 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2412 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2413 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2414 int cur_node = state->nodes.elems[node_cnt];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2415 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2416 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2417 *err = re_node_set_merge (&next_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2418 dfa->eclosures + dfa->nexts[cur_node]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2419 if (BE (*err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2420 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2421 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2422 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2423 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2424 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2425 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2426 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2427 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2428 /* We don't need to check errors here, since the return value of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2429 this function is next_state and ERR is already set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2430
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2431 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2432 re_string_skip_bytes (&mctx->input, 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2433 return next_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2434 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2435 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2436
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2437 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2438 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2439 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2440 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2441 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2442 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2443 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2444 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2445
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2446 for (i = 0; i < pstate->nodes.nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2447 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2448 re_node_set dest_nodes, *new_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2449 int cur_node_idx = pstate->nodes.elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2450 int naccepted, dest_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2451 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2452 re_dfastate_t *dest_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2453
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2454 if (!dfa->nodes[cur_node_idx].accept_mb)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2455 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2456
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2457 if (dfa->nodes[cur_node_idx].constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2458 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2459 context = re_string_context_at (&mctx->input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2460 re_string_cur_idx (&mctx->input),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2461 mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2462 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2463 context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2464 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2465 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2466
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2467 /* How many bytes the node can accept? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2468 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2469 re_string_cur_idx (&mctx->input));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2470 if (naccepted == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2471 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2472
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2473 /* The node can accepts `naccepted' bytes. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2474 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2475 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2476 : mctx->max_mb_elem_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2477 err = clean_state_log_if_needed (mctx, dest_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2478 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2479 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2480 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2481 assert (dfa->nexts[cur_node_idx] != -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2482 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2483 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2484
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2485 dest_state = mctx->state_log[dest_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2486 if (dest_state == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2487 dest_nodes = *new_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2488 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2489 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2490 err = re_node_set_init_union (&dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2491 dest_state->entrance_nodes, new_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2492 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2493 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2494 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2495 context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2496 mctx->state_log[dest_idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2497 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2498 if (dest_state != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2499 re_node_set_free (&dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2500 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2501 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2502 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2503 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2504 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2505 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2506
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2507 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2508 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2509 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2510 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2511 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2512 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2513 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2514 int cur_str_idx = re_string_cur_idx (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2515
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2516 for (i = 0; i < nodes->nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2517 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2518 int dest_str_idx, prev_nelem, bkc_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2519 int node_idx = nodes->elems[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2520 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2521 const re_token_t *node = dfa->nodes + node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2522 re_node_set *new_dest_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2523
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2524 /* Check whether `node' is a backreference or not. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2525 if (node->type != OP_BACK_REF)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2526 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2527
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2528 if (node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2529 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2530 context = re_string_context_at (&mctx->input, cur_str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2531 mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2532 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2533 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2534 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2535
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2536 /* `node' is a backreference.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2537 Check the substring which the substring matched. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2538 bkc_idx = mctx->nbkref_ents;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2539 err = get_subexp (mctx, node_idx, cur_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2540 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2541 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2542
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2543 /* And add the epsilon closures (which is `new_dest_nodes') of
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2544 the backreference to appropriate state_log. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2545 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2546 assert (dfa->nexts[node_idx] != -1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2547 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2548 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2549 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2550 int subexp_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2551 re_dfastate_t *dest_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2552 struct re_backref_cache_entry *bkref_ent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2553 bkref_ent = mctx->bkref_ents + bkc_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2554 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2555 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2556 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2557 new_dest_nodes = (subexp_len == 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2558 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2559 : dfa->eclosures + dfa->nexts[node_idx]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2560 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2561 - bkref_ent->subexp_from);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2562 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2563 mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2564 dest_state = mctx->state_log[dest_str_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2565 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2566 : mctx->state_log[cur_str_idx]->nodes.nelem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2567 /* Add `new_dest_node' to state_log. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2568 if (dest_state == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2569 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2570 mctx->state_log[dest_str_idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2571 = re_acquire_state_context (&err, dfa, new_dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2572 context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2573 if (BE (mctx->state_log[dest_str_idx] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2574 && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2575 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2576 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2577 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2578 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2579 re_node_set dest_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2580 err = re_node_set_init_union (&dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2581 dest_state->entrance_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2582 new_dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2583 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2584 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2585 re_node_set_free (&dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2586 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2587 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2588 mctx->state_log[dest_str_idx]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2589 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2590 re_node_set_free (&dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2591 if (BE (mctx->state_log[dest_str_idx] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2592 && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2593 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2594 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2595 /* We need to check recursively if the backreference can epsilon
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2596 transit. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2597 if (subexp_len == 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2598 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2599 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2600 err = check_subexp_matching_top (mctx, new_dest_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2601 cur_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2602 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2603 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2604 err = transit_state_bkref (mctx, new_dest_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2605 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2606 goto free_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2607 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2608 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2609 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2610 err = REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2611 free_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2612 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2613 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2614
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2615 /* Enumerate all the candidates which the backreference BKREF_NODE can match
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2616 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2617 Note that we might collect inappropriate candidates here.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2618 However, the cost of checking them strictly here is too high, then we
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2619 delay these checking for prune_impossible_nodes(). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2621 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2622 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2623 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2624 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2625 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2626 int subexp_num, sub_top_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2627 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2628 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2629 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2630 if (cache_idx != -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2631 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2632 const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2633 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2634 if (entry->node == bkref_node)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2635 return REG_NOERROR; /* We already checked it. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2636 while (entry++->more);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2637 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2638
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2639 subexp_num = dfa->nodes[bkref_node].opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2640
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2641 /* For each sub expression */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2642 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2643 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2644 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2645 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2646 re_sub_match_last_t *sub_last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2647 int sub_last_idx, sl_str, bkref_str_off;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2648
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2649 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2650 continue; /* It isn't related. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2651
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2652 sl_str = sub_top->str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2653 bkref_str_off = bkref_str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2654 /* At first, check the last node of sub expressions we already
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2655 evaluated. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2656 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2657 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2658 int sl_str_diff;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2659 sub_last = sub_top->lasts[sub_last_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2660 sl_str_diff = sub_last->str_idx - sl_str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2661 /* The matched string by the sub expression match with the substring
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2662 at the back reference? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2663 if (sl_str_diff > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2664 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2665 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2666 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2667 /* Not enough chars for a successful match. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2668 if (bkref_str_off + sl_str_diff > mctx->input.len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2669 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2670
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2671 err = clean_state_log_if_needed (mctx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2672 bkref_str_off
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2673 + sl_str_diff);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2674 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2675 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2676 buf = (const char *) re_string_get_buffer (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2677 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2678 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2679 break; /* We don't need to search this sub expression any more. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2680 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2681 bkref_str_off += sl_str_diff;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2682 sl_str += sl_str_diff;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2683 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2684 bkref_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2685
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2686 /* Reload buf, since the preceding call might have reallocated
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2687 the buffer. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2688 buf = (const char *) re_string_get_buffer (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2689
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2690 if (err == REG_NOMATCH)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2691 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2692 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2693 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2694 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2695
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2696 if (sub_last_idx < sub_top->nlasts)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2697 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2698 if (sub_last_idx > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2699 ++sl_str;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2700 /* Then, search for the other last nodes of the sub expression. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2701 for (; sl_str <= bkref_str_idx; ++sl_str)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2702 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2703 int cls_node, sl_str_off;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2704 const re_node_set *nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2705 sl_str_off = sl_str - sub_top->str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2706 /* The matched string by the sub expression match with the substring
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2707 at the back reference? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2708 if (sl_str_off > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2709 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2710 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2711 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2712 /* If we are at the end of the input, we cannot match. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2713 if (bkref_str_off >= mctx->input.len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2714 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2715
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2716 err = extend_buffers (mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2717 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2718 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2719
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2720 buf = (const char *) re_string_get_buffer (&mctx->input);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2721 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2722 if (buf [bkref_str_off++] != buf[sl_str - 1])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2723 break; /* We don't need to search this sub expression
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2724 any more. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2725 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2726 if (mctx->state_log[sl_str] == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2727 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2728 /* Does this state have a ')' of the sub expression? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2729 nodes = &mctx->state_log[sl_str]->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2730 cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2731 if (cls_node == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2732 continue; /* No. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2733 if (sub_top->path == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2734 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
2735 sub_top->path = re_calloc (state_array_t,
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
2736 sl_str - sub_top->str_idx + 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2737 if (sub_top->path == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2738 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2739 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2740 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2741 in the current context? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2742 err = check_arrival (mctx, sub_top->path, sub_top->node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2743 sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2744 if (err == REG_NOMATCH)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2745 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2746 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2747 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2748 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2749 if (BE (sub_last == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2750 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2751 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2752 bkref_str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2753 if (err == REG_NOMATCH)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2754 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2755 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2756 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2757 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2758 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2759
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2760 /* Helper functions for get_subexp(). */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2761
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2762 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2763 If it can arrive, register the sub expression expressed with SUB_TOP
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2764 and SUB_LAST. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2765
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2766 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2767 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2768 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2769 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2770 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2771 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2772 int to_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2773 /* Can the subexpression arrive the back reference? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2774 err = check_arrival (mctx, &sub_last->path, sub_last->node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2775 sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2776 if (err != REG_NOERROR)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2777 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2778 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2779 sub_last->str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2780 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2781 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2782 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2783 return clean_state_log_if_needed (mctx, to_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2784 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2785
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2786 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2787 Search '(' if FL_OPEN, or search ')' otherwise.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2788 TODO: This function isn't efficient...
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2789 Because there might be more than one nodes whose types are
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2790 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2791 nodes.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2792 E.g. RE: (a){2} */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2793
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2794 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2795 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2796 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2797 int subexp_idx, int type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2798 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2799 int cls_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2800 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2801 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2802 int cls_node = nodes->elems[cls_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2803 const re_token_t *node = dfa->nodes + cls_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2804 if (node->type == type
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2805 && node->opr.idx == subexp_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2806 return cls_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2807 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2808 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2809 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2810
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2811 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2812 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2813 heavily reused.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2814 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2815
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2816 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2817 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2818 check_arrival (re_match_context_t *mctx, state_array_t *path,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2819 int top_node, int top_str, int last_node, int last_str,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2820 int type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2821 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2822 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2823 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2824 int subexp_num, backup_cur_idx, str_idx, null_cnt;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2825 re_dfastate_t *cur_state = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2826 re_node_set *cur_nodes, next_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2827 re_dfastate_t **backup_state_log;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2828 unsigned int context;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2829
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2830 subexp_num = dfa->nodes[top_node].opr.idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2831 /* Extend the buffer if we need. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2832 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2833 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2834 re_dfastate_t **new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2835 int old_alloc = path->alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2836 path->alloc += last_str + mctx->max_mb_elem_len + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2837 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2838 if (new_array == NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2839 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2840 path->alloc = old_alloc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2841 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2842 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2843 path->array = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2844 memset (new_array + old_alloc, '\0',
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2845 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2846 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2847
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2848 str_idx = path->next_idx == 0 ? top_str : path->next_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2849
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2850 /* Temporary modify MCTX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2851 backup_state_log = mctx->state_log;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2852 backup_cur_idx = mctx->input.cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2853 mctx->state_log = path->array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2854 mctx->input.cur_idx = str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2855
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2856 /* Setup initial node set. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2857 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2858 if (str_idx == top_str)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2859 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2860 err = re_node_set_init_1 (&next_nodes, top_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2861 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2862 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2863 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2864 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2865 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2866 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2867 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2868 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2869 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2870 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2871 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2872 cur_state = mctx->state_log[str_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2873 if (cur_state && cur_state->has_backref)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2874 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2875 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2876 if (BE ( err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2877 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2878 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2879 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2880 re_node_set_init_empty (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2881 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2882 if (str_idx == top_str || (cur_state && cur_state->has_backref))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2883 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2884 if (next_nodes.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2885 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2886 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2887 subexp_num, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2888 if (BE ( err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2889 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2890 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2891 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2892 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2893 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2894 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2895 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2896 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2897 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2898 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2899 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2900 mctx->state_log[str_idx] = cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2901 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2902
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2903 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2904 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2905 re_node_set_empty (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2906 if (mctx->state_log[str_idx + 1])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2907 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2908 err = re_node_set_merge (&next_nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2909 &mctx->state_log[str_idx + 1]->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2910 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2911 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2912 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2913 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2914 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2915 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2916 if (cur_state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2917 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2918 err = check_arrival_add_next_nodes (mctx, str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2919 &cur_state->non_eps_nodes, &next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2920 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2921 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2922 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2923 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2924 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2925 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2926 ++str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2927 if (next_nodes.nelem)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2928 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2929 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2930 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2931 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2932 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2933 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2934 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2935 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2936 subexp_num, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2937 if (BE ( err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2938 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2939 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2940 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2941 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2942 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2943 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2944 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2945 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2946 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2947 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2948 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2949 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2950 mctx->state_log[str_idx] = cur_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2951 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2952 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2953 re_node_set_free (&next_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2954 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2955 : &mctx->state_log[last_str]->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2956 path->next_idx = str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2957
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2958 /* Fix MCTX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2959 mctx->state_log = backup_state_log;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2960 mctx->input.cur_idx = backup_cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2961
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2962 /* Then check the current node set has the node LAST_NODE. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2963 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2964 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2965
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2966 return REG_NOMATCH;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2967 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2969 /* Helper functions for check_arrival. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2970
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2971 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2972 to NEXT_NODES.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2973 TODO: This function is similar to the functions transit_state*(),
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2974 however this function has many additional works.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2975 Can't we unify them? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2976
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2977 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2978 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2979 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2980 re_node_set *cur_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
2981 re_node_set *next_nodes)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2982 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2983 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2984 int result;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2985 int cur_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2986 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2987 re_node_set union_set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2988 re_node_set_init_empty (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2989 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2990 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2991 int naccepted = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2992 int cur_node = cur_nodes->elems[cur_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2993 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2994 re_token_type_t type = dfa->nodes[cur_node].type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2995 assert (!IS_EPSILON_NODE (type));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2996 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2997 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2998 /* If the node may accept `multi byte'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
2999 if (dfa->nodes[cur_node].accept_mb)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3000 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3001 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3002 str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3003 if (naccepted > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3004 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3005 re_dfastate_t *dest_state;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3006 int next_node = dfa->nexts[cur_node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3007 int next_idx = str_idx + naccepted;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3008 dest_state = mctx->state_log[next_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3009 re_node_set_empty (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3010 if (dest_state)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3011 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3012 err = re_node_set_merge (&union_set, &dest_state->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3013 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3014 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3015 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3016 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3017 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3018 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3019 result = re_node_set_insert (&union_set, next_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3020 if (BE (result < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3021 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3022 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3023 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3024 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3025 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3026 &union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3027 if (BE (mctx->state_log[next_idx] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3028 && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3029 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3030 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3031 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3032 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3033 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3034 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3035 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3036 if (naccepted
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3037 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3038 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3039 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3040 if (BE (result < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3041 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3042 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3043 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3044 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3045 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3046 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3047 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3048 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3049 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3050
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3051 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3052 CUR_NODES, however exclude the nodes which are:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3053 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3054 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3055 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3056
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3057 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3058 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3059 check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3060 int ex_subexp, int type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3061 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3062 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3063 int idx, outside_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3064 re_node_set new_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3065 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3066 assert (cur_nodes->nelem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3067 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3068 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3069 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3070 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3071 /* Create a new node set NEW_NODES with the nodes which are epsilon
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3072 closures of the node in CUR_NODES. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3073
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3074 for (idx = 0; idx < cur_nodes->nelem; ++idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3075 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3076 int cur_node = cur_nodes->elems[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3077 re_node_set *eclosure = dfa->eclosures + cur_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3078 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3079 if (outside_node == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3080 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3081 /* There are no problematic nodes, just merge them. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3082 err = re_node_set_merge (&new_nodes, eclosure);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3083 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3084 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3085 re_node_set_free (&new_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3086 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3087 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3088 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3089 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3090 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3091 /* There are problematic nodes, re-calculate incrementally. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3092 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3093 ex_subexp, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3094 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3095 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3096 re_node_set_free (&new_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3097 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3098 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3099 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3100 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3101 re_node_set_free (cur_nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3102 *cur_nodes = new_nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3103 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3104 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3105
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3106 /* Helper function for check_arrival_expand_ecl.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3107 Check incrementally the epsilon closure of TARGET, and if it isn't
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3108 problematic append it to DST_NODES. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3109
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3110 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3111 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3112 check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3113 int target, int ex_subexp, int type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3114 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3115 int cur_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3116 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3117 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3118 int err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3119
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3120 if (dfa->nodes[cur_node].type == type
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3121 && dfa->nodes[cur_node].opr.idx == ex_subexp)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3122 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3123 if (type == OP_CLOSE_SUBEXP)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3124 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3125 err = re_node_set_insert (dst_nodes, cur_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3126 if (BE (err == -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3127 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3128 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3129 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3130 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3131 err = re_node_set_insert (dst_nodes, cur_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3132 if (BE (err == -1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3133 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3134 if (dfa->edests[cur_node].nelem == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3135 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3136 if (dfa->edests[cur_node].nelem == 2)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3137 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3138 reg_errcode_t ret =
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3139 check_arrival_expand_ecl_sub (dfa, dst_nodes,
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3140 dfa->edests[cur_node].elems[1],
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3141 ex_subexp, type);
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3142 if (BE (ret != REG_NOERROR, 0))
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3143 return ret;
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3144 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3145 cur_node = dfa->edests[cur_node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3146 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3147 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3148 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3149
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3150
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3151 /* For all the back references in the current state, calculate the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3152 destination of the back references by the appropriate entry
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3153 in MCTX->BKREF_ENTS. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3154
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3155 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3156 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3157 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3158 int cur_str, int subexp_num, int type)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3159 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3160 re_dfa_t *const dfa = mctx->dfa;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3161 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3162 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3163 struct re_backref_cache_entry *ent;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3164
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3165 if (cache_idx_start == -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3166 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3167
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3168 restart:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3169 ent = mctx->bkref_ents + cache_idx_start;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3170 do
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3171 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3172 int to_idx, next_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3173
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3174 /* Is this entry ENT is appropriate? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3175 if (!re_node_set_contains (cur_nodes, ent->node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3176 continue; /* No. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3177
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3178 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3179 /* Calculate the destination of the back reference, and append it
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3180 to MCTX->STATE_LOG. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3181 if (to_idx == cur_str)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3182 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3183 /* The backreference did epsilon transit, we must re-check all the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3184 node in the current state. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3185 re_node_set new_dests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3186 reg_errcode_t err2, err3;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3187 next_node = dfa->edests[ent->node].elems[0];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3188 if (re_node_set_contains (cur_nodes, next_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3189 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3190 err = re_node_set_init_1 (&new_dests, next_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3191 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3192 err3 = re_node_set_merge (cur_nodes, &new_dests);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3193 re_node_set_free (&new_dests);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3194 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3195 || err3 != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3196 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3197 err = (err != REG_NOERROR ? err
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3198 : (err2 != REG_NOERROR ? err2 : err3));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3199 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3200 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3201 /* TODO: It is still inefficient... */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3202 goto restart;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3203 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3204 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3205 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3206 re_node_set union_set;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3207 next_node = dfa->nexts[ent->node];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3208 if (mctx->state_log[to_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3209 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3210 int ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3211 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3212 next_node))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3213 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3214 err = re_node_set_init_copy (&union_set,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3215 &mctx->state_log[to_idx]->nodes);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3216 ret = re_node_set_insert (&union_set, next_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3217 if (BE (err != REG_NOERROR || ret < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3218 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3219 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3220 err = err != REG_NOERROR ? err : REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3221 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3222 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3223 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3224 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3225 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3226 err = re_node_set_init_1 (&union_set, next_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3227 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3228 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3229 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3230 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3231 re_node_set_free (&union_set);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3232 if (BE (mctx->state_log[to_idx] == NULL
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3233 && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3234 return err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3235 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3236 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3237 while (ent++->more);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3238 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3239 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3240
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3241 /* Build transition table for the state.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3242 Return 1 if succeeded, otherwise return NULL. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3243
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3244 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3245 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3246 build_trtable (re_dfa_t *dfa, re_dfastate_t *state)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3247 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3248 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3249 int i, j, ch, need_word_trtable = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3250 unsigned int elem, mask;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3251 int dests_node_malloced = 0, dest_states_malloced = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3252 int ndests; /* Number of the destination states from `state'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3253 re_dfastate_t **trtable;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3254 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3255 re_node_set follows, *dests_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3256 bitset *dests_ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3257 bitset acceptable;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3258
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3259 /* We build DFA states which corresponds to the destination nodes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3260 from `state'. `dests_node[i]' represents the nodes which i-th
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3261 destination state contains, and `dests_ch[i]' represents the
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3262 characters which i-th destination state accepts. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3263 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3264 dests_node = (re_node_set *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3265 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3266 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3267 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3268 dests_node = (re_node_set *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3269 malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3270 if (BE (dests_node == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3271 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3272 dests_node_malloced = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3273 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3274 dests_ch = (bitset *) (dests_node + SBC_MAX);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3275
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3276 /* Initialize transiton table. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3277 state->word_trtable = state->trtable = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3278
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3279 /* At first, group all nodes belonging to `state' into several
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3280 destinations. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3281 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3282 if (BE (ndests <= 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3283 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3284 if (dests_node_malloced)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3285 free (dests_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3286 /* Return 0 in case of an error, 1 otherwise. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3287 if (ndests == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3288 {
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3289 state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3290 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3291 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3292 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3293 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3294
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3295 err = re_node_set_alloc (&follows, ndests + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3296 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3297 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3298
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3299 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3300 + ndests * 3 * sizeof (re_dfastate_t *)))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3301 dest_states = (re_dfastate_t **)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3302 alloca (ndests * 3 * sizeof (re_dfastate_t *));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3303 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3304 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3305 dest_states = (re_dfastate_t **)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3306 malloc (ndests * 3 * sizeof (re_dfastate_t *));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3307 if (BE (dest_states == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3308 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3309 out_free:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3310 if (dest_states_malloced)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3311 free (dest_states);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3312 re_node_set_free (&follows);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3313 for (i = 0; i < ndests; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3314 re_node_set_free (dests_node + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3315 if (dests_node_malloced)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3316 free (dests_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3317 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3318 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3319 dest_states_malloced = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3320 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3321 dest_states_word = dest_states + ndests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3322 dest_states_nl = dest_states_word + ndests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3323 bitset_empty (acceptable);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3324
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3325 /* Then build the states for all destinations. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3326 for (i = 0; i < ndests; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3327 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3328 int next_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3329 re_node_set_empty (&follows);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3330 /* Merge the follows of this destination states. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3331 for (j = 0; j < dests_node[i].nelem; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3332 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3333 next_node = dfa->nexts[dests_node[i].elems[j]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3334 if (next_node != -1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3335 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3336 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3337 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3338 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3339 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3340 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3341 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3342 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3343 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3344 /* If the new state has context constraint,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3345 build appropriate states for these contexts. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3346 if (dest_states[i]->has_constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3347 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3348 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3349 CONTEXT_WORD);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3350 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3351 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3352
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3353 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3354 need_word_trtable = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3355
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3356 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3357 CONTEXT_NEWLINE);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3358 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3359 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3360 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3361 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3362 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3363 dest_states_word[i] = dest_states[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3364 dest_states_nl[i] = dest_states[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3365 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3366 bitset_merge (acceptable, dests_ch[i]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3367 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3368
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3369 if (!BE (need_word_trtable, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3370 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3371 /* We don't care about whether the following character is a word
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3372 character, or we are in a single-byte character set so we can
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3373 discern by looking at the character code: allocate a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3374 256-entry transition table. */
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3375 trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3376 if (BE (trtable == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3377 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3378
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3379 /* For all characters ch...: */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3380 for (i = 0; i < BITSET_UINTS; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3381 for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3382 elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3383 mask <<= 1, elem >>= 1, ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3384 if (BE (elem & 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3385 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3386 /* There must be exactly one destination which accepts
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3387 character ch. See group_nodes_into_DFAstates. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3388 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3389 ;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3390
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3391 /* j-th destination accepts the word character ch. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3392 if (dfa->word_char[i] & mask)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3393 trtable[ch] = dest_states_word[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3394 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3395 trtable[ch] = dest_states[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3396 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3397 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3398 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3399 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3400 /* We care about whether the following character is a word
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3401 character, and we are in a multi-byte character set: discern
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3402 by looking at the character code: build two 256-entry
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3403 transition tables, one starting at trtable[0] and one
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3404 starting at trtable[SBC_MAX]. */
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
3405 trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3406 if (BE (trtable == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3407 goto out_free;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3408
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3409 /* For all characters ch...: */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3410 for (i = 0; i < BITSET_UINTS; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3411 for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3412 elem;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3413 mask <<= 1, elem >>= 1, ++ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3414 if (BE (elem & 1, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3415 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3416 /* There must be exactly one destination which accepts
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3417 character ch. See group_nodes_into_DFAstates. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3418 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3419 ;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3420
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3421 /* j-th destination accepts the word character ch. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3422 trtable[ch] = dest_states[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3423 trtable[ch + SBC_MAX] = dest_states_word[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3424 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3425 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3426
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3427 /* new line */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3428 if (bitset_contain (acceptable, NEWLINE_CHAR))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3429 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3430 /* The current state accepts newline character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3431 for (j = 0; j < ndests; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3432 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3433 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3434 /* k-th destination accepts newline character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3435 trtable[NEWLINE_CHAR] = dest_states_nl[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3436 if (need_word_trtable)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3437 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3438 /* There must be only one destination which accepts
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3439 newline. See group_nodes_into_DFAstates. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3440 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3441 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3442 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3443
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3444 if (dest_states_malloced)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3445 free (dest_states);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3446
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3447 re_node_set_free (&follows);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3448 for (i = 0; i < ndests; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3449 re_node_set_free (dests_node + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3450
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3451 if (dests_node_malloced)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3452 free (dests_node);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3453
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3454 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3455 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3456
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3457 /* Group all nodes belonging to STATE into several destinations.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3458 Then for all destinations, set the nodes belonging to the destination
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3459 to DESTS_NODE[i] and set the characters accepted by the destination
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3460 to DEST_CH[i]. This function return the number of destinations. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3461
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3462 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3463 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3464 group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3465 re_node_set *dests_node, bitset *dests_ch)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3466 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3467 reg_errcode_t err;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3468 int result;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3469 int i, j, k;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3470 int ndests; /* Number of the destinations from `state'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3471 bitset accepts; /* Characters a node can accept. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3472 const re_node_set *cur_nodes = &state->nodes;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3473 bitset_empty (accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3474 ndests = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3475
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3476 /* For all the nodes belonging to `state', */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3477 for (i = 0; i < cur_nodes->nelem; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3478 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3479 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3480 re_token_type_t type = node->type;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3481 unsigned int constraint = node->constraint;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3482
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3483 /* Enumerate all single byte character this node can accept. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3484 if (type == CHARACTER)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3485 bitset_set (accepts, node->opr.c);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3486 else if (type == SIMPLE_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3487 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3488 bitset_merge (accepts, node->opr.sbcset);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3489 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3490 else if (type == OP_PERIOD)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3491 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3492 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3493 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3494 bitset_merge (accepts, dfa->sb_char);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3495 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3496 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3497 bitset_set_all (accepts);
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3498 if (!(dfa->syntax & REG_DOT_NEWLINE))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3499 bitset_clear (accepts, '\n');
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3500 if (dfa->syntax & REG_DOT_NOT_NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3501 bitset_clear (accepts, '\0');
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3502 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3503 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3504 else if (type == OP_UTF8_PERIOD)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3505 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3506 memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3507 if (!(dfa->syntax & REG_DOT_NEWLINE))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3508 bitset_clear (accepts, '\n');
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3509 if (dfa->syntax & REG_DOT_NOT_NULL)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3510 bitset_clear (accepts, '\0');
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3511 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3512 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3513 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3514 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3515
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3516 /* Check the `accepts' and sift the characters which are not
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3517 match it the context. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3518 if (constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3519 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3520 if (constraint & NEXT_NEWLINE_CONSTRAINT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3521 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3522 int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3523 bitset_empty (accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3524 if (accepts_newline)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3525 bitset_set (accepts, NEWLINE_CHAR);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3526 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3527 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3528 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3529 if (constraint & NEXT_ENDBUF_CONSTRAINT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3530 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3531 bitset_empty (accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3532 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3533 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3534
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3535 if (constraint & NEXT_WORD_CONSTRAINT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3536 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3537 unsigned int any_set = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3538 if (type == CHARACTER && !node->word_char)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3539 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3540 bitset_empty (accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3541 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3542 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3543 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3544 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3545 for (j = 0; j < BITSET_UINTS; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3546 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3547 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3548 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3549 for (j = 0; j < BITSET_UINTS; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3550 any_set |= (accepts[j] &= dfa->word_char[j]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3551 if (!any_set)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3552 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3553 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3554 if (constraint & NEXT_NOTWORD_CONSTRAINT)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3555 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3556 unsigned int any_set = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3557 if (type == CHARACTER && node->word_char)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3558 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3559 bitset_empty (accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3560 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3561 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3562 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3563 if (dfa->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3564 for (j = 0; j < BITSET_UINTS; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3565 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3566 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3567 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3568 for (j = 0; j < BITSET_UINTS; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3569 any_set |= (accepts[j] &= ~dfa->word_char[j]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3570 if (!any_set)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3571 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3572 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3573 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3574
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3575 /* Then divide `accepts' into DFA states, or create a new
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3576 state. Above, we make sure that accepts is not empty. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3577 for (j = 0; j < ndests; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3578 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3579 bitset intersec; /* Intersection sets, see below. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3580 bitset remains;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3581 /* Flags, see below. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3582 int has_intersec, not_subset, not_consumed;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3583
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3584 /* Optimization, skip if this state doesn't accept the character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3585 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3586 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3587
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3588 /* Enumerate the intersection set of this state and `accepts'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3589 has_intersec = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3590 for (k = 0; k < BITSET_UINTS; ++k)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3591 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3592 /* And skip if the intersection set is empty. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3593 if (!has_intersec)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3594 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3595
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3596 /* Then check if this state is a subset of `accepts'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3597 not_subset = not_consumed = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3598 for (k = 0; k < BITSET_UINTS; ++k)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3599 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3600 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3601 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3602 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3603
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3604 /* If this state isn't a subset of `accepts', create a
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3605 new group state, which has the `remains'. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3606 if (not_subset)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3607 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3608 bitset_copy (dests_ch[ndests], remains);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3609 bitset_copy (dests_ch[j], intersec);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3610 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3611 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3612 goto error_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3613 ++ndests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3614 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3615
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3616 /* Put the position in the current group. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3617 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3618 if (BE (result < 0, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3619 goto error_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3620
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3621 /* If all characters are consumed, go to next node. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3622 if (!not_consumed)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3623 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3624 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3625 /* Some characters remain, create a new group. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3626 if (j == ndests)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3627 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3628 bitset_copy (dests_ch[ndests], accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3629 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3630 if (BE (err != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3631 goto error_return;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3632 ++ndests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3633 bitset_empty (accepts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3634 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3635 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3636 return ndests;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3637 error_return:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3638 for (j = 0; j < ndests; ++j)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3639 re_node_set_free (dests_node + j);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3640 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3641 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3642
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3643 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3644 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3645 Return the number of the bytes the node accepts.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3646 STR_IDX is the current index of the input string.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3647
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3648 This function handles the nodes which can accept one character, or
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3649 one collating element like '.', '[a-z]', opposite to the other nodes
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3650 can only accept one byte. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3651
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3652 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3653 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3654 check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3655 const re_string_t *input, int str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3656 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3657 const re_token_t *node = dfa->nodes + node_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3658 int char_len, elem_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3659 int i;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3660
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3661 if (BE (node->type == OP_UTF8_PERIOD, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3662 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3663 unsigned char c = re_string_byte_at (input, str_idx), d;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3664 if (BE (c < 0xc2, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3665 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3666
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3667 if (str_idx + 2 > input->len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3668 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3669
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3670 d = re_string_byte_at (input, str_idx + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3671 if (c < 0xe0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3672 return (d < 0x80 || d > 0xbf) ? 0 : 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3673 else if (c < 0xf0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3674 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3675 char_len = 3;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3676 if (c == 0xe0 && d < 0xa0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3677 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3678 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3679 else if (c < 0xf8)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3680 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3681 char_len = 4;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3682 if (c == 0xf0 && d < 0x90)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3683 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3684 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3685 else if (c < 0xfc)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3686 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3687 char_len = 5;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3688 if (c == 0xf8 && d < 0x88)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3689 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3690 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3691 else if (c < 0xfe)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3692 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3693 char_len = 6;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3694 if (c == 0xfc && d < 0x84)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3695 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3696 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3697 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3698 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3699
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3700 if (str_idx + char_len > input->len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3701 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3702
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3703 for (i = 1; i < char_len; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3704 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3705 d = re_string_byte_at (input, str_idx + i);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3706 if (d < 0x80 || d > 0xbf)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3707 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3708 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3709 return char_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3710 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3711
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3712 char_len = re_string_char_size_at (input, str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3713 if (node->type == OP_PERIOD)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3714 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3715 if (char_len <= 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3716 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3717 /* FIXME: I don't think this if is needed, as both '\n'
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3718 and '\0' are char_len == 1. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3719 /* '.' accepts any one character except the following two cases. */
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3720 if ((!(dfa->syntax & REG_DOT_NEWLINE) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3721 re_string_byte_at (input, str_idx) == '\n') ||
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3722 ((dfa->syntax & REG_DOT_NOT_NULL) &&
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3723 re_string_byte_at (input, str_idx) == '\0'))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3724 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3725 return char_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3726 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3727
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3728 elem_len = re_string_elem_size_at (input, str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3729 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3730 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3731
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3732 if (node->type == COMPLEX_BRACKET)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3733 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3734 const re_charset_t *cset = node->opr.mbcset;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3735 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3736 const unsigned char *pin
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3737 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3738 int j;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3739 uint32_t nrules;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3740 # endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3741 int match_len = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3742 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3743 ? re_string_wchar_at (input, str_idx) : 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3744
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3745 /* match with multibyte character? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3746 for (i = 0; i < cset->nmbchars; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3747 if (wc == cset->mbchars[i])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3748 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3749 match_len = char_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3750 goto check_node_accept_bytes_match;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3751 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3752 /* match with character_class? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3753 for (i = 0; i < cset->nchar_classes; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3754 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3755 wctype_t wt = cset->char_classes[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3756 if (__iswctype (wc, wt))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3757 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3758 match_len = char_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3759 goto check_node_accept_bytes_match;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3760 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3761 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3762
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3763 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3764 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3765 if (nrules != 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3766 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3767 unsigned int in_collseq = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3768 const int32_t *table, *indirect;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3769 const unsigned char *weights, *extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3770 const char *collseqwc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3771 int32_t idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3772 /* This #include defines a local function! */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3773 # include <locale/weight.h>
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3774
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3775 /* match with collating_symbol? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3776 if (cset->ncoll_syms)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3777 extra = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3778 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3779 for (i = 0; i < cset->ncoll_syms; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3780 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3781 const unsigned char *coll_sym = extra + cset->coll_syms[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3782 /* Compare the length of input collating element and
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3783 the length of current collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3784 if (*coll_sym != elem_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3785 continue;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3786 /* Compare each bytes. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3787 for (j = 0; j < *coll_sym; j++)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3788 if (pin[j] != coll_sym[1 + j])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3789 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3790 if (j == *coll_sym)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3791 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3792 /* Match if every bytes is equal. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3793 match_len = j;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3794 goto check_node_accept_bytes_match;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3795 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3796 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3797
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3798 if (cset->nranges)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3799 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3800 if (elem_len <= char_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3801 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3802 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3803 in_collseq = __collseq_table_lookup (collseqwc, wc);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3804 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3805 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3806 in_collseq = find_collation_sequence_value (pin, elem_len);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3807 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3808 /* match with range expression? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3809 for (i = 0; i < cset->nranges; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3810 if (cset->range_starts[i] <= in_collseq
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3811 && in_collseq <= cset->range_ends[i])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3812 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3813 match_len = elem_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3814 goto check_node_accept_bytes_match;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3815 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3816
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3817 /* match with equivalence_class? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3818 if (cset->nequiv_classes)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3819 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3820 const unsigned char *cp = pin;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3821 table = (const int32_t *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3822 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3823 weights = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3824 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3825 extra = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3826 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3827 indirect = (const int32_t *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3828 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3829 idx = findidx (&cp);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3830 if (idx > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3831 for (i = 0; i < cset->nequiv_classes; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3832 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3833 int32_t equiv_class_idx = cset->equiv_classes[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3834 size_t weight_len = weights[idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3835 if (weight_len == weights[equiv_class_idx])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3836 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3837 int cnt = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3838 while (cnt <= weight_len
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3839 && (weights[equiv_class_idx + 1 + cnt]
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3840 == weights[idx + 1 + cnt]))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3841 ++cnt;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3842 if (cnt > weight_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3843 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3844 match_len = elem_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3845 goto check_node_accept_bytes_match;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3846 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3847 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3848 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3849 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3850 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3851 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3852 # endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3853 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3854 /* match with range expression? */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3855 #if __GNUC__ >= 2
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3856 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3857 #else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3858 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3859 cmp_buf[2] = wc;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3860 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3861 for (i = 0; i < cset->nranges; ++i)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3862 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3863 cmp_buf[0] = cset->range_starts[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3864 cmp_buf[4] = cset->range_ends[i];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3865 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3866 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3867 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3868 match_len = char_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3869 goto check_node_accept_bytes_match;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3870 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3871 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3872 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3873 check_node_accept_bytes_match:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3874 if (!cset->non_match)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3875 return match_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3876 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3877 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3878 if (match_len > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3879 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3880 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3881 return (elem_len > char_len) ? elem_len : char_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3882 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3883 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3884 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3885 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3886
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3887 # ifdef _LIBC
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3888 static unsigned int
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3889 find_collation_sequence_value (mbs, mbs_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3890 const unsigned char *mbs;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3891 size_t mbs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3892 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3893 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3894 if (nrules == 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3895 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3896 if (mbs_len == 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3897 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3898 /* No valid character. Match it as a single byte character. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3899 const unsigned char *collseq = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3900 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3901 return collseq[mbs[0]];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3902 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3903 return UINT_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3904 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3905 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3906 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3907 int32_t idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3908 const unsigned char *extra = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3909 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3910 int32_t extrasize = (const unsigned char *)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3911 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3912
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3913 for (idx = 0; idx < extrasize;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3914 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3915 int mbs_cnt, found = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3916 int32_t elem_mbs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3917 /* Skip the name of collating element name. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3918 idx = idx + extra[idx] + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3919 elem_mbs_len = extra[idx++];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3920 if (mbs_len == elem_mbs_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3921 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3922 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3923 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3924 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3925 if (mbs_cnt == elem_mbs_len)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3926 /* Found the entry. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3927 found = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3928 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3929 /* Skip the byte sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3930 idx += elem_mbs_len;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3931 /* Adjust for the alignment. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3932 idx = (idx + 3) & ~3;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3933 /* Skip the collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3934 idx += sizeof (uint32_t);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3935 /* Skip the wide char sequence of the collating element. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3936 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3937 /* If we found the entry, return the sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3938 if (found)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3939 return *(uint32_t *) (extra + idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3940 /* Skip the collation sequence value. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3941 idx += sizeof (uint32_t);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3942 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3943 return UINT_MAX;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3944 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3945 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3946 # endif /* _LIBC */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3947 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3948
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3949 /* Check whether the node accepts the byte which is IDX-th
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3950 byte of the INPUT. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3951
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3952 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3953 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3954 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
3955 int idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3956 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3957 unsigned char ch;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3958 ch = re_string_byte_at (&mctx->input, idx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3959 switch (node->type)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3960 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3961 case CHARACTER:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3962 if (node->opr.c != ch)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3963 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3964 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3965
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3966 case SIMPLE_BRACKET:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3967 if (!bitset_contain (node->opr.sbcset, ch))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3968 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3969 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3970
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3971 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3972 case OP_UTF8_PERIOD:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3973 if (ch >= 0x80)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3974 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3975 /* FALLTHROUGH */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3976 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3977 case OP_PERIOD:
6101
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3978 if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE))
c9bcd4ba6b3f * config/srclist.txt: Add glibc bug 1237.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6084
diff changeset
3979 || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL)))
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3980 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3981 break;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3982
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3983 default:
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3984 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3985 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3986
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3987 if (node->constraint)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3988 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3989 /* The node has constraints. Check whether the current context
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3990 satisfies the constraints. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3991 unsigned int context = re_string_context_at (&mctx->input, idx,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3992 mctx->eflags);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3993 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3994 return 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3995 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3996
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3997 return 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3998 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
3999
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4000 /* Extend the buffers, if the buffers have run out. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4001
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4002 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4003 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4004 extend_buffers (re_match_context_t *mctx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4005 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4006 reg_errcode_t ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4007 re_string_t *pstr = &mctx->input;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4008
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4009 /* Double the lengthes of the buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4010 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4011 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4012 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4013
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4014 if (mctx->state_log != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4015 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4016 /* And double the length of state_log. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4017 /* XXX We have no indication of the size of this buffer. If this
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4018 allocation fail we have no indication that the state_log array
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4019 does not have the right size. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4020 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4021 pstr->bufs_len + 1);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4022 if (BE (new_array == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4023 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4024 mctx->state_log = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4025 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4026
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4027 /* Then reconstruct the buffers. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4028 if (pstr->icase)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4029 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4030 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4031 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4032 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4033 ret = build_wcs_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4034 if (BE (ret != REG_NOERROR, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4035 return ret;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4036 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4037 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4038 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4039 build_upper_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4040 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4041 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4042 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4043 #ifdef RE_ENABLE_I18N
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4044 if (pstr->mb_cur_max > 1)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4045 build_wcs_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4046 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4047 #endif /* RE_ENABLE_I18N */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4048 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4049 if (pstr->trans != NULL)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4050 re_string_translate_buffer (pstr);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4051 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4052 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4053 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4054 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4055
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4056
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4057 /* Functions for matching context. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4058
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4059 /* Initialize MCTX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4060
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4061 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4062 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4063 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4064 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4065 mctx->eflags = eflags;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4066 mctx->match_last = -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4067 if (n > 0)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4068 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4069 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4070 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4071 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4072 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4073 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4074 /* Already zero-ed by the caller.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4075 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4076 mctx->bkref_ents = NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4077 mctx->nbkref_ents = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4078 mctx->nsub_tops = 0; */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4079 mctx->abkref_ents = n;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4080 mctx->max_mb_elem_len = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4081 mctx->asub_tops = n;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4082 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4083 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4084
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4085 /* Clean the entries which depend on the current input in MCTX.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4086 This function must be invoked when the matcher changes the start index
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4087 of the input, or changes the input string. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4088
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4089 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4090 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4091 match_ctx_clean (re_match_context_t *mctx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4092 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4093 int st_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4094 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4095 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4096 int sl_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4097 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4098 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4099 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4100 re_sub_match_last_t *last = top->lasts[sl_idx];
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4101 re_free (last->path.array);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4102 re_free (last);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4103 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4104 re_free (top->lasts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4105 if (top->path)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4106 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4107 re_free (top->path->array);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4108 re_free (top->path);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4109 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4110 free (top);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4111 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4112
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4113 mctx->nsub_tops = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4114 mctx->nbkref_ents = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4115 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4116
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4117 /* Free all the memory associated with MCTX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4118
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4119 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4120 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4121 match_ctx_free (re_match_context_t *mctx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4122 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4123 /* First, free all the memory associated with MCTX->SUB_TOPS. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4124 match_ctx_clean (mctx);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4125 re_free (mctx->sub_tops);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4126 re_free (mctx->bkref_ents);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4127 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4128
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4129 /* Add a new backreference entry to MCTX.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4130 Note that we assume that caller never call this function with duplicate
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4131 entry, and call with STR_IDX which isn't smaller than any existing entry.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4132 */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4133
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4134 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4135 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4136 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4137 int from, int to)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4138 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4139 if (mctx->nbkref_ents >= mctx->abkref_ents)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4140 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4141 struct re_backref_cache_entry* new_entry;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4142 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4143 mctx->abkref_ents * 2);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4144 if (BE (new_entry == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4145 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4146 re_free (mctx->bkref_ents);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4147 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4148 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4149 mctx->bkref_ents = new_entry;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4150 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4151 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4152 mctx->abkref_ents *= 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4153 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4154 if (mctx->nbkref_ents > 0
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4155 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4156 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4157
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4158 mctx->bkref_ents[mctx->nbkref_ents].node = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4159 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4160 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4161 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4162
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4163 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4164 If bit N is clear, means that this entry won't epsilon-transition to
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4165 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4166 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4167 such node.
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4168
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4169 A backreference does not epsilon-transition unless it is empty, so set
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4170 to all zeros if FROM != TO. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4171 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
6171
5862ee08bfc1 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
Paul Eggert <eggert@cs.ucla.edu>
parents: 6125
diff changeset
4172 = (from == to ? -1 : 0);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4173
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4174 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4175 if (mctx->max_mb_elem_len < to - from)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4176 mctx->max_mb_elem_len = to - from;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4177 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4178 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4179
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4180 /* Search for the first entry which has the same str_idx, or -1 if none is
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4181 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4182
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4183 static int
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4184 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4185 search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4186 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4187 int left, right, mid, last;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4188 last = right = mctx->nbkref_ents;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4189 for (left = 0; left < right;)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4190 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4191 mid = (left + right) / 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4192 if (mctx->bkref_ents[mid].str_idx < str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4193 left = mid + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4194 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4195 right = mid;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4196 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4197 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4198 return left;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4199 else
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4200 return -1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4201 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4202
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4203 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4204 at STR_IDX. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4205
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4206 static reg_errcode_t
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4207 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4208 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4209 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4210 #ifdef DEBUG
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4211 assert (mctx->sub_tops != NULL);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4212 assert (mctx->asub_tops > 0);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4213 #endif
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4214 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4215 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4216 int new_asub_tops = mctx->asub_tops * 2;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4217 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4218 re_sub_match_top_t *,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4219 new_asub_tops);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4220 if (BE (new_array == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4221 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4222 mctx->sub_tops = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4223 mctx->asub_tops = new_asub_tops;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4224 }
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
4225 mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4226 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4227 return REG_ESPACE;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4228 mctx->sub_tops[mctx->nsub_tops]->node = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4229 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4230 return REG_NOERROR;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4231 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4232
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4233 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4234 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4235
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4236 static re_sub_match_last_t *
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4237 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4238 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4239 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4240 re_sub_match_last_t *new_entry;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4241 if (BE (subtop->nlasts == subtop->alasts, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4242 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4243 int new_alasts = 2 * subtop->alasts + 1;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4244 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4245 re_sub_match_last_t *,
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4246 new_alasts);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4247 if (BE (new_array == NULL, 0))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4248 return NULL;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4249 subtop->lasts = new_array;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4250 subtop->alasts = new_alasts;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4251 }
6119
c3bf2ea44695 Make regex safe for g++. This fixes one real bug (an "err"
Paul Eggert <eggert@cs.ucla.edu>
parents: 6101
diff changeset
4252 new_entry = re_calloc (re_sub_match_last_t, 1);
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4253 if (BE (new_entry != NULL, 1))
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4254 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4255 subtop->lasts[subtop->nlasts] = new_entry;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4256 new_entry->node = node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4257 new_entry->str_idx = str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4258 ++subtop->nlasts;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4259 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4260 return new_entry;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4261 }
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4262
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4263 static void
6076
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4264 internal_function
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4265 sift_ctx_init (re_sift_context_t *sctx,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4266 re_dfastate_t **sifted_sts,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4267 re_dfastate_t **limited_sts,
e2dd51f6e259 * config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents: 6069
diff changeset
4268 int last_node, int last_str_idx)
5968
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4269 {
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4270 sctx->sifted_states = sifted_sts;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4271 sctx->limited_states = limited_sts;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4272 sctx->last_node = last_node;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4273 sctx->last_str_idx = last_str_idx;
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4274 re_node_set_init_empty (&sctx->limits);
541fed6ae301 * modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
diff changeset
4275 }