Mercurial > hg > octave-lojdl > gnulib-hg
annotate lib/regex-quote.c @ 17426:90f3d53e01f5
sig2str: port to C++
* lib/sig2str.h (sig2str, str2sig): Declare as extern "C".
Reported by Daniel J Sebald in
<http://lists.gnu.org/archive/html/bug-gnulib/2013-06/msg00000.html>.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Sun, 02 Jun 2013 11:52:41 -0700 |
parents | e542fd46ad6f |
children |
rev | line source |
---|---|
13728 | 1 /* Construct a regular expression from a literal string. |
17249
e542fd46ad6f
maint: update all copyright year number ranges
Eric Blake <eblake@redhat.com>
parents:
16201
diff
changeset
|
2 Copyright (C) 1995, 2010-2013 Free Software Foundation, Inc. |
13728 | 3 Written by Bruno Haible <haible@clisp.cons.org>, 2010. |
4 | |
5 This program is free software: you can redistribute it and/or modify | |
6 it under the terms of the GNU General Public License as published by | |
7 the Free Software Foundation; either version 3 of the License, or | |
8 (at your option) any later version. | |
9 | |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU General Public License | |
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
17 | |
18 #include <config.h> | |
19 | |
20 /* Specification. */ | |
21 #include "regex-quote.h" | |
22 | |
23 #include <string.h> | |
24 | |
25 #include "mbuiter.h" | |
26 #include "xalloc.h" | |
27 | |
28 /* Characters that are special in a BRE. */ | |
29 static const char bre_special[] = "$^.*[]\\"; | |
30 | |
31 /* Characters that are special in an ERE. */ | |
14409
ee532a615968
regex-quote: Fix creation of POSIX extended regular expressions.
Bruno Haible <bruno@clisp.org>
parents:
14079
diff
changeset
|
32 static const char ere_special[] = "$^.*[]\\+?{}()|"; |
13728 | 33 |
14410 | 34 struct regex_quote_spec |
35 regex_quote_spec_posix (int cflags, bool anchored) | |
36 { | |
37 struct regex_quote_spec result; | |
38 | |
39 strcpy (result.special, cflags != 0 ? ere_special : bre_special); | |
40 result.multibyte = true; | |
41 result.anchored = anchored; | |
42 | |
43 return result; | |
44 } | |
45 | |
46 /* Syntax bit values, defined in GNU <regex.h>. We don't include it here, | |
47 otherwise this module would need to depend on gnulib module 'regex'. */ | |
48 #define RE_BK_PLUS_QM 0x00000002 | |
49 #define RE_INTERVALS 0x00000200 | |
50 #define RE_LIMITED_OPS 0x00000400 | |
51 #define RE_NEWLINE_ALT 0x00000800 | |
52 #define RE_NO_BK_BRACES 0x00001000 | |
53 #define RE_NO_BK_PARENS 0x00002000 | |
54 #define RE_NO_BK_VBAR 0x00008000 | |
55 | |
56 struct regex_quote_spec | |
57 regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored) | |
13728 | 58 { |
14410 | 59 struct regex_quote_spec result; |
60 char *p; | |
61 | |
62 p = result.special; | |
63 memcpy (p, bre_special, sizeof (bre_special) - 1); | |
64 p += sizeof (bre_special) - 1; | |
65 if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0) | |
66 { | |
67 *p++ = '+'; | |
68 *p++ = '?'; | |
69 } | |
70 if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0) | |
71 { | |
72 *p++ = '{'; | |
73 *p++ = '}'; | |
74 } | |
75 if ((syntax & RE_NO_BK_PARENS) != 0) | |
76 { | |
77 *p++ = '('; | |
78 *p++ = ')'; | |
79 } | |
80 if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0) | |
81 *p++ = '|'; | |
82 if ((syntax & RE_NEWLINE_ALT) != 0) | |
83 *p++ = '\n'; | |
84 *p = '\0'; | |
85 | |
86 result.multibyte = true; | |
87 result.anchored = anchored; | |
88 | |
89 return result; | |
90 } | |
91 | |
92 /* Characters that are special in a PCRE. */ | |
93 static const char pcre_special[] = "$^.*[]\\+?{}()|"; | |
94 | |
95 /* Options bit values, defined in <pcre.h>. We don't include it here, because | |
96 it is not a standard header. */ | |
97 #define PCRE_ANCHORED 0x00000010 | |
98 #define PCRE_EXTENDED 0x00000008 | |
99 | |
100 struct regex_quote_spec | |
101 regex_quote_spec_pcre (int options, bool anchored) | |
102 { | |
103 struct regex_quote_spec result; | |
104 char *p; | |
105 | |
106 p = result.special; | |
107 memcpy (p, bre_special, sizeof (pcre_special) - 1); | |
108 p += sizeof (pcre_special) - 1; | |
109 if (options & PCRE_EXTENDED) | |
110 { | |
111 *p++ = ' '; | |
112 *p++ = '\t'; | |
113 *p++ = '\n'; | |
114 *p++ = '\v'; | |
115 *p++ = '\f'; | |
116 *p++ = '\r'; | |
117 *p++ = '#'; | |
118 } | |
119 *p = '\0'; | |
120 | |
121 /* PCRE regular expressions consist of UTF-8 characters of options contains | |
122 PCRE_UTF8 and of single bytes otherwise. */ | |
123 result.multibyte = false; | |
124 /* If options contains PCRE_ANCHORED, the anchoring is implicit. */ | |
125 result.anchored = (options & PCRE_ANCHORED ? 0 : anchored); | |
126 | |
127 return result; | |
128 } | |
129 | |
130 size_t | |
131 regex_quote_length (const char *string, const struct regex_quote_spec *spec) | |
132 { | |
133 const char *special = spec->special; | |
13728 | 134 size_t length; |
135 | |
136 length = 0; | |
14410 | 137 if (spec->anchored) |
138 length += 2; /* for '^' at the beginning and '$' at the end */ | |
139 if (spec->multibyte) | |
13728 | 140 { |
14410 | 141 mbui_iterator_t iter; |
142 | |
143 for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) | |
144 { | |
145 /* We know that special contains only ASCII characters. */ | |
146 if (mb_len (mbui_cur (iter)) == 1 | |
147 && strchr (special, * mbui_cur_ptr (iter))) | |
148 length += 1; | |
149 length += mb_len (mbui_cur (iter)); | |
150 } | |
13728 | 151 } |
14410 | 152 else |
153 { | |
154 const char *iter; | |
155 | |
156 for (iter = string; *iter != '\0'; iter++) | |
157 { | |
158 if (strchr (special, *iter)) | |
159 length += 1; | |
160 length += 1; | |
161 } | |
162 } | |
163 | |
13728 | 164 return length; |
165 } | |
166 | |
167 char * | |
14410 | 168 regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec) |
13728 | 169 { |
14410 | 170 const char *special = spec->special; |
171 | |
172 if (spec->anchored) | |
173 *p++ = '^'; | |
174 if (spec->multibyte) | |
175 { | |
176 mbui_iterator_t iter; | |
13728 | 177 |
14410 | 178 for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) |
179 { | |
180 /* We know that special contains only ASCII characters. */ | |
181 if (mb_len (mbui_cur (iter)) == 1 | |
182 && strchr (special, * mbui_cur_ptr (iter))) | |
183 *p++ = '\\'; | |
184 memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter))); | |
185 p += mb_len (mbui_cur (iter)); | |
186 } | |
187 } | |
188 else | |
13728 | 189 { |
14410 | 190 const char *iter; |
191 | |
192 for (iter = string; *iter != '\0'; iter++) | |
193 { | |
194 if (strchr (special, *iter)) | |
195 *p++ = '\\'; | |
196 *p++ = *iter++; | |
197 } | |
13728 | 198 } |
14410 | 199 if (spec->anchored) |
200 *p++ = '$'; | |
201 | |
13728 | 202 return p; |
203 } | |
204 | |
205 char * | |
14410 | 206 regex_quote (const char *string, const struct regex_quote_spec *spec) |
13728 | 207 { |
14410 | 208 size_t length = regex_quote_length (string, spec); |
13728 | 209 char *result = XNMALLOC (length + 1, char); |
210 char *p; | |
211 | |
212 p = result; | |
14410 | 213 p = regex_quote_copy (p, string, spec); |
13728 | 214 *p = '\0'; |
215 return result; | |
216 } |