Mercurial > hg > octave-kai > gnulib-hg
annotate lib/striconveh.c @ 11653:ecb384fccb5b
fpurge: migrate into <stdio.h>
* lib/fpurge.h: Delete...
* lib/stdio.in.h (fpurge): ...and declare here, instead.
* lib/fpurge.c (fpurge): Change declaring header.
* modules/fpurge (Files): Drop deleted file.
(Depends-on): Add stdio.
(configure.ac): Set witness.
* modules/stdio (Makefile.am): Support fpurge macros.
* m4/stdio_h.m4 (gl_STDIO_H_DEFAULTS): Likewise.
* m4/fpurge.m4 (gl_FUNC_FPURGE): Set appropriate variables.
* lib/fflush.c: Update client.
* tests/test-fpurge.c: Likewise.
* NEWS: Mention the change.
Signed-off-by: Eric Blake <ebb9@byu.net>
author | Eric Blake <ebb9@byu.net> |
---|---|
date | Wed, 24 Jun 2009 06:49:00 -0600 |
parents | 5991da96e696 |
children | ba1f5a03459a |
rev | line source |
---|---|
7882 | 1 /* Character set conversion with error handling. |
9835
69955b112e2e
Avoid failure when attempting to return empty iconv results on some platforms.
Bruno Haible <bruno@clisp.org>
parents:
9309
diff
changeset
|
2 Copyright (C) 2001-2008 Free Software Foundation, Inc. |
7882 | 3 Written by Bruno Haible and Simon Josefsson. |
4 | |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8605
diff
changeset
|
5 This program is free software: you can redistribute it and/or modify |
7882 | 6 it under the terms of the GNU General Public License as published by |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8605
diff
changeset
|
7 the Free Software Foundation; either version 3 of the License, or |
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8605
diff
changeset
|
8 (at your option) any later version. |
7882 | 9 |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU General Public License | |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8605
diff
changeset
|
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
7882 | 17 |
18 #include <config.h> | |
19 | |
20 /* Specification. */ | |
21 #include "striconveh.h" | |
22 | |
23 #include <errno.h> | |
24 #include <stdbool.h> | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
27 | |
28 #if HAVE_ICONV | |
29 # include <iconv.h> | |
30 # include "unistr.h" | |
31 #endif | |
32 | |
33 #include "c-strcase.h" | |
7936
b93a4036490a
Optimize the recognition of "UTF-8" strings.
Bruno Haible <bruno@clisp.org>
parents:
7927
diff
changeset
|
34 #include "c-strcaseeq.h" |
7882 | 35 |
36 #ifndef SIZE_MAX | |
37 # define SIZE_MAX ((size_t) -1) | |
38 #endif | |
39 | |
40 | |
41 #if HAVE_ICONV | |
42 | |
43 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion | |
44 error occurs, we may have to determine the Unicode representation of the | |
45 inconvertible character. */ | |
46 | |
47 /* iconv_carefully is like iconv, except that it stops as soon as it encounters | |
48 a conversion error, and it returns in *INCREMENTED a boolean telling whether | |
49 it has incremented the input pointers past the error location. */ | |
50 # if !defined _LIBICONV_VERSION && !defined __GLIBC__ | |
51 /* Irix iconv() inserts a NUL byte if it cannot convert. | |
52 NetBSD iconv() inserts a question mark if it cannot convert. | |
53 Only GNU libiconv and GNU libc are known to prefer to fail rather | |
54 than doing a lossy conversion. */ | |
55 static size_t | |
56 iconv_carefully (iconv_t cd, | |
57 const char **inbuf, size_t *inbytesleft, | |
58 char **outbuf, size_t *outbytesleft, | |
59 bool *incremented) | |
60 { | |
61 const char *inptr = *inbuf; | |
62 const char *inptr_end = inptr + *inbytesleft; | |
63 char *outptr = *outbuf; | |
64 size_t outsize = *outbytesleft; | |
65 const char *inptr_before; | |
66 size_t res; | |
67 | |
68 do | |
69 { | |
70 size_t insize; | |
71 | |
72 inptr_before = inptr; | |
73 res = (size_t)(-1); | |
74 | |
75 for (insize = 1; inptr + insize <= inptr_end; insize++) | |
76 { | |
77 res = iconv (cd, | |
78 (ICONV_CONST char **) &inptr, &insize, | |
79 &outptr, &outsize); | |
80 if (!(res == (size_t)(-1) && errno == EINVAL)) | |
81 break; | |
10406
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
82 /* iconv can eat up a shift sequence but give EINVAL while attempting |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
83 to convert the first character. E.g. libiconv does this. */ |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
84 if (inptr > inptr_before) |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
85 { |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
86 res = 0; |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
87 break; |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
88 } |
7882 | 89 } |
90 | |
91 if (res == 0) | |
92 { | |
93 *outbuf = outptr; | |
94 *outbytesleft = outsize; | |
95 } | |
96 } | |
97 while (res == 0 && inptr < inptr_end); | |
98 | |
99 *inbuf = inptr; | |
100 *inbytesleft = inptr_end - inptr; | |
101 if (res != (size_t)(-1) && res > 0) | |
102 { | |
103 /* iconv() has already incremented INPTR. We cannot go back to a | |
104 previous INPTR, otherwise the state inside CD would become invalid, | |
105 if FROM_CODESET is a stateful encoding. So, tell the caller that | |
106 *INBUF has already been incremented. */ | |
107 *incremented = (inptr > inptr_before); | |
108 errno = EILSEQ; | |
109 return (size_t)(-1); | |
110 } | |
111 else | |
112 { | |
113 *incremented = false; | |
114 return res; | |
115 } | |
116 } | |
117 # else | |
118 # define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \ | |
119 (*(incremented) = false, \ | |
120 iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft)) | |
121 # endif | |
122 | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
123 /* iconv_carefully_1 is like iconv_carefully, except that it stops after |
10406
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
124 converting one character or one shift sequence. */ |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
125 static size_t |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
126 iconv_carefully_1 (iconv_t cd, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
127 const char **inbuf, size_t *inbytesleft, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
128 char **outbuf, size_t *outbytesleft, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
129 bool *incremented) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
130 { |
10406
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
131 const char *inptr_before = *inbuf; |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
132 const char *inptr = inptr_before; |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
133 const char *inptr_end = inptr_before + *inbytesleft; |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
134 char *outptr = *outbuf; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
135 size_t outsize = *outbytesleft; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
136 size_t res = (size_t)(-1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
137 size_t insize; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
138 |
10406
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
139 for (insize = 1; inptr_before + insize <= inptr_end; insize++) |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
140 { |
10406
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
141 inptr = inptr_before; |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
142 res = iconv (cd, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
143 (ICONV_CONST char **) &inptr, &insize, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
144 &outptr, &outsize); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
145 if (!(res == (size_t)(-1) && errno == EINVAL)) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
146 break; |
10406
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
147 /* iconv can eat up a shift sequence but give EINVAL while attempting |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
148 to convert the first character. E.g. libiconv does this. */ |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
149 if (inptr > inptr_before) |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
150 { |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
151 res = 0; |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
152 break; |
5991da96e696
Make striconveh work better with stateful encodings.
Bruno Haible <bruno@clisp.org>
parents:
9836
diff
changeset
|
153 } |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
154 } |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
155 |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
156 *inbuf = inptr; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
157 *inbytesleft = inptr_end - inptr; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
158 # if !defined _LIBICONV_VERSION && !defined __GLIBC__ |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
159 /* Irix iconv() inserts a NUL byte if it cannot convert. |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
160 NetBSD iconv() inserts a question mark if it cannot convert. |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
161 Only GNU libiconv and GNU libc are known to prefer to fail rather |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
162 than doing a lossy conversion. */ |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
163 if (res != (size_t)(-1) && res > 0) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
164 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
165 /* iconv() has already incremented INPTR. We cannot go back to a |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
166 previous INPTR, otherwise the state inside CD would become invalid, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
167 if FROM_CODESET is a stateful encoding. So, tell the caller that |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
168 *INBUF has already been incremented. */ |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
169 *incremented = (inptr > inptr_before); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
170 errno = EILSEQ; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
171 return (size_t)(-1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
172 } |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
173 # endif |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
174 |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
175 if (res != (size_t)(-1)) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
176 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
177 *outbuf = outptr; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
178 *outbytesleft = outsize; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
179 } |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
180 *incremented = false; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
181 return res; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
182 } |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
183 |
8577 | 184 /* utf8conv_carefully is like iconv, except that |
185 - it converts from UTF-8 to UTF-8, | |
186 - it stops as soon as it encounters a conversion error, and it returns | |
187 in *INCREMENTED a boolean telling whether it has incremented the input | |
188 pointers past the error location, | |
189 - if one_character_only is true, it stops after converting one | |
190 character. */ | |
191 static size_t | |
192 utf8conv_carefully (bool one_character_only, | |
193 const char **inbuf, size_t *inbytesleft, | |
194 char **outbuf, size_t *outbytesleft, | |
195 bool *incremented) | |
196 { | |
197 const char *inptr = *inbuf; | |
198 size_t insize = *inbytesleft; | |
199 char *outptr = *outbuf; | |
200 size_t outsize = *outbytesleft; | |
201 size_t res; | |
202 | |
203 res = 0; | |
204 do | |
205 { | |
206 ucs4_t uc; | |
207 int n; | |
208 int m; | |
209 | |
210 n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize); | |
211 if (n < 0) | |
212 { | |
213 errno = (n == -2 ? EINVAL : EILSEQ); | |
214 n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize); | |
215 inptr += n; | |
216 insize -= n; | |
217 res = (size_t)(-1); | |
218 *incremented = true; | |
219 break; | |
220 } | |
221 if (outsize == 0) | |
222 { | |
223 errno = E2BIG; | |
224 res = (size_t)(-1); | |
225 *incremented = false; | |
226 break; | |
227 } | |
228 m = u8_uctomb ((uint8_t *) outptr, uc, outsize); | |
229 if (m == -2) | |
230 { | |
231 errno = E2BIG; | |
232 res = (size_t)(-1); | |
233 *incremented = false; | |
234 break; | |
235 } | |
236 inptr += n; | |
237 insize -= n; | |
238 if (m == -1) | |
239 { | |
240 errno = EILSEQ; | |
241 res = (size_t)(-1); | |
242 *incremented = true; | |
243 break; | |
244 } | |
245 outptr += m; | |
246 outsize -= m; | |
247 } | |
248 while (!one_character_only && insize > 0); | |
249 | |
250 *inbuf = inptr; | |
251 *inbytesleft = insize; | |
252 *outbuf = outptr; | |
253 *outbytesleft = outsize; | |
254 return res; | |
255 } | |
256 | |
7882 | 257 static int |
258 mem_cd_iconveh_internal (const char *src, size_t srclen, | |
259 iconv_t cd, iconv_t cd1, iconv_t cd2, | |
260 enum iconv_ilseq_handler handler, | |
261 size_t extra_alloc, | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
262 size_t *offsets, |
7882 | 263 char **resultp, size_t *lengthp) |
264 { | |
265 /* When a conversion error occurs, we cannot start using CD1 and CD2 at | |
266 this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR. | |
267 Instead, we have to start afresh from the beginning of SRC. */ | |
268 /* Use a temporary buffer, so that for small strings, a single malloc() | |
269 call will be sufficient. */ | |
270 # define tmpbufsize 4096 | |
271 /* The alignment is needed when converting e.g. to glibc's WCHAR_T or | |
272 libiconv's UCS-4-INTERNAL encoding. */ | |
273 union { unsigned int align; char buf[tmpbufsize]; } tmp; | |
274 # define tmpbuf tmp.buf | |
275 | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
276 char *initial_result; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
277 char *result; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
278 size_t allocated; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
279 size_t length; |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
280 size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */ |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
281 |
7943 | 282 if (*resultp != NULL && *lengthp >= sizeof (tmpbuf)) |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
283 { |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
284 initial_result = *resultp; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
285 allocated = *lengthp; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
286 } |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
287 else |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
288 { |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
289 initial_result = tmpbuf; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
290 allocated = sizeof (tmpbuf); |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
291 } |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
292 result = initial_result; |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
293 |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
294 /* Test whether a direct conversion is possible at all. */ |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
295 if (cd == (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
296 goto indirectly; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
297 |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
298 if (offsets != NULL) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
299 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
300 size_t i; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
301 |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
302 for (i = 0; i < srclen; i++) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
303 offsets[i] = (size_t)(-1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
304 |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
305 last_length = (size_t)(-1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
306 } |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
307 length = 0; |
7882 | 308 |
309 /* First, try a direct conversion, and see whether a conversion error | |
310 occurs at all. */ | |
311 { | |
312 const char *inptr = src; | |
313 size_t insize = srclen; | |
314 | |
315 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ | |
316 # if defined _LIBICONV_VERSION \ | |
317 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) | |
318 /* Set to the initial state. */ | |
319 iconv (cd, NULL, NULL, NULL, NULL); | |
320 # endif | |
321 | |
322 while (insize > 0) | |
323 { | |
324 char *outptr = result + length; | |
325 size_t outsize = allocated - extra_alloc - length; | |
326 bool incremented; | |
327 size_t res; | |
328 bool grow; | |
329 | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
330 if (offsets != NULL) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
331 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
332 if (length != last_length) /* ensure that offset[] be increasing */ |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
333 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
334 offsets[inptr - src] = length; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
335 last_length = length; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
336 } |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
337 res = iconv_carefully_1 (cd, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
338 &inptr, &insize, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
339 &outptr, &outsize, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
340 &incremented); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
341 } |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
342 else |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
343 /* Use iconv_carefully instead of iconv here, because: |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
344 - If TO_CODESET is UTF-8, we can do the error handling in this |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
345 loop, no need for a second loop, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
346 - With iconv() implementations other than GNU libiconv and GNU |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
347 libc, if we use iconv() in a big swoop, checking for an E2BIG |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
348 return, we lose the number of irreversible conversions. */ |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
349 res = iconv_carefully (cd, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
350 &inptr, &insize, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
351 &outptr, &outsize, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
352 &incremented); |
7882 | 353 |
354 length = outptr - result; | |
355 grow = (length + extra_alloc > allocated / 2); | |
356 if (res == (size_t)(-1)) | |
357 { | |
358 if (errno == E2BIG) | |
359 grow = true; | |
360 else if (errno == EINVAL) | |
361 break; | |
362 else if (errno == EILSEQ && handler != iconveh_error) | |
363 { | |
364 if (cd2 == (iconv_t)(-1)) | |
365 { | |
366 /* TO_CODESET is UTF-8. */ | |
367 /* Error handling can produce up to 1 byte of output. */ | |
368 if (length + 1 + extra_alloc > allocated) | |
369 { | |
370 char *memory; | |
371 | |
372 allocated = 2 * allocated; | |
373 if (length + 1 + extra_alloc > allocated) | |
374 abort (); | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
375 if (result == initial_result) |
7882 | 376 memory = (char *) malloc (allocated); |
377 else | |
378 memory = (char *) realloc (result, allocated); | |
379 if (memory == NULL) | |
380 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
381 if (result != initial_result) |
7882 | 382 free (result); |
383 errno = ENOMEM; | |
384 return -1; | |
385 } | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
386 if (result == initial_result) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
387 memcpy (memory, initial_result, length); |
7882 | 388 result = memory; |
389 grow = false; | |
390 } | |
391 /* The input is invalid in FROM_CODESET. Eat up one byte | |
392 and emit a question mark. */ | |
393 if (!incremented) | |
394 { | |
395 if (insize == 0) | |
396 abort (); | |
397 inptr++; | |
398 insize--; | |
399 } | |
400 result[length] = '?'; | |
401 length++; | |
402 } | |
403 else | |
404 goto indirectly; | |
405 } | |
406 else | |
407 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
408 if (result != initial_result) |
7882 | 409 { |
410 int saved_errno = errno; | |
411 free (result); | |
412 errno = saved_errno; | |
413 } | |
414 return -1; | |
415 } | |
416 } | |
417 if (insize == 0) | |
418 break; | |
419 if (grow) | |
420 { | |
421 char *memory; | |
422 | |
423 allocated = 2 * allocated; | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
424 if (result == initial_result) |
7882 | 425 memory = (char *) malloc (allocated); |
426 else | |
427 memory = (char *) realloc (result, allocated); | |
428 if (memory == NULL) | |
429 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
430 if (result != initial_result) |
7882 | 431 free (result); |
432 errno = ENOMEM; | |
433 return -1; | |
434 } | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
435 if (result == initial_result) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
436 memcpy (memory, initial_result, length); |
7882 | 437 result = memory; |
438 } | |
439 } | |
440 } | |
441 | |
442 /* Now get the conversion state back to the initial state. | |
443 But avoid glibc-2.1 bug and Solaris 2.7 bug. */ | |
444 #if defined _LIBICONV_VERSION \ | |
445 || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) | |
446 for (;;) | |
447 { | |
448 char *outptr = result + length; | |
449 size_t outsize = allocated - extra_alloc - length; | |
450 size_t res; | |
451 | |
452 res = iconv (cd, NULL, NULL, &outptr, &outsize); | |
453 length = outptr - result; | |
454 if (res == (size_t)(-1)) | |
455 { | |
456 if (errno == E2BIG) | |
457 { | |
458 char *memory; | |
459 | |
460 allocated = 2 * allocated; | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
461 if (result == initial_result) |
7882 | 462 memory = (char *) malloc (allocated); |
463 else | |
464 memory = (char *) realloc (result, allocated); | |
465 if (memory == NULL) | |
466 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
467 if (result != initial_result) |
7882 | 468 free (result); |
469 errno = ENOMEM; | |
470 return -1; | |
471 } | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
472 if (result == initial_result) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
473 memcpy (memory, initial_result, length); |
7882 | 474 result = memory; |
475 } | |
476 else | |
477 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
478 if (result != initial_result) |
7882 | 479 { |
480 int saved_errno = errno; | |
481 free (result); | |
482 errno = saved_errno; | |
483 } | |
484 return -1; | |
485 } | |
486 } | |
487 else | |
488 break; | |
489 } | |
490 #endif | |
491 | |
492 /* The direct conversion succeeded. */ | |
493 goto done; | |
494 | |
495 indirectly: | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
496 /* The direct conversion failed. |
7882 | 497 Use a conversion through UTF-8. */ |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
498 if (offsets != NULL) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
499 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
500 size_t i; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
501 |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
502 for (i = 0; i < srclen; i++) |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
503 offsets[i] = (size_t)(-1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
504 |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
505 last_length = (size_t)(-1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
506 } |
7882 | 507 length = 0; |
508 { | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
509 const bool slowly = (offsets != NULL || handler == iconveh_error); |
7882 | 510 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ |
511 char utf8buf[utf8bufsize + 1]; | |
512 size_t utf8len = 0; | |
513 const char *in1ptr = src; | |
514 size_t in1size = srclen; | |
515 bool do_final_flush1 = true; | |
516 bool do_final_flush2 = true; | |
517 | |
518 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ | |
519 # if defined _LIBICONV_VERSION \ | |
520 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) | |
521 /* Set to the initial state. */ | |
522 if (cd1 != (iconv_t)(-1)) | |
523 iconv (cd1, NULL, NULL, NULL, NULL); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
524 if (cd2 != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
525 iconv (cd2, NULL, NULL, NULL, NULL); |
7882 | 526 # endif |
527 | |
528 while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2) | |
529 { | |
530 char *out1ptr = utf8buf + utf8len; | |
531 size_t out1size = utf8bufsize - utf8len; | |
532 bool incremented1; | |
533 size_t res1; | |
534 int errno1; | |
535 | |
536 /* Conversion step 1: from FROM_CODESET to UTF-8. */ | |
537 if (in1size > 0) | |
538 { | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
539 if (offsets != NULL |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
540 && length != last_length) /* ensure that offset[] be increasing */ |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
541 { |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
542 offsets[in1ptr - src] = length; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
543 last_length = length; |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
544 } |
7882 | 545 if (cd1 != (iconv_t)(-1)) |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
546 { |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
547 if (slowly) |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
548 res1 = iconv_carefully_1 (cd1, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
549 &in1ptr, &in1size, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
550 &out1ptr, &out1size, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
551 &incremented1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
552 else |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
553 res1 = iconv_carefully (cd1, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
554 &in1ptr, &in1size, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
555 &out1ptr, &out1size, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
556 &incremented1); |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
557 } |
7882 | 558 else |
559 { | |
560 /* FROM_CODESET is UTF-8. */ | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
561 res1 = utf8conv_carefully (slowly, |
8577 | 562 &in1ptr, &in1size, |
563 &out1ptr, &out1size, | |
564 &incremented1); | |
7882 | 565 } |
566 } | |
567 else if (do_final_flush1) | |
568 { | |
569 /* Now get the conversion state of CD1 back to the initial state. | |
570 But avoid glibc-2.1 bug and Solaris 2.7 bug. */ | |
571 # if defined _LIBICONV_VERSION \ | |
572 || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) | |
573 if (cd1 != (iconv_t)(-1)) | |
574 res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size); | |
575 else | |
576 # endif | |
577 res1 = 0; | |
578 do_final_flush1 = false; | |
579 incremented1 = true; | |
580 } | |
581 else | |
582 { | |
583 res1 = 0; | |
584 incremented1 = true; | |
585 } | |
586 if (res1 == (size_t)(-1) | |
587 && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ)) | |
588 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
589 if (result != initial_result) |
7882 | 590 { |
591 int saved_errno = errno; | |
592 free (result); | |
593 errno = saved_errno; | |
594 } | |
595 return -1; | |
596 } | |
597 if (res1 == (size_t)(-1) | |
598 && errno == EILSEQ && handler != iconveh_error) | |
599 { | |
600 /* The input is invalid in FROM_CODESET. Eat up one byte and | |
601 emit a question mark. Room for the question mark was allocated | |
602 at the end of utf8buf. */ | |
603 if (!incremented1) | |
604 { | |
605 if (in1size == 0) | |
606 abort (); | |
607 in1ptr++; | |
608 in1size--; | |
609 } | |
610 utf8buf[utf8len++] = '?'; | |
611 } | |
612 errno1 = errno; | |
613 utf8len = out1ptr - utf8buf; | |
614 | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
615 if (offsets != NULL |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
616 || in1size == 0 |
7882 | 617 || utf8len > utf8bufsize / 2 |
618 || (res1 == (size_t)(-1) && errno1 == E2BIG)) | |
619 { | |
620 /* Conversion step 2: from UTF-8 to TO_CODESET. */ | |
621 const char *in2ptr = utf8buf; | |
622 size_t in2size = utf8len; | |
623 | |
624 while (in2size > 0 | |
625 || (in1size == 0 && !do_final_flush1 && do_final_flush2)) | |
626 { | |
627 char *out2ptr = result + length; | |
628 size_t out2size = allocated - extra_alloc - length; | |
629 bool incremented2; | |
630 size_t res2; | |
631 bool grow; | |
632 | |
633 if (in2size > 0) | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
634 { |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
635 if (cd2 != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
636 res2 = iconv_carefully (cd2, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
637 &in2ptr, &in2size, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
638 &out2ptr, &out2size, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
639 &incremented2); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
640 else |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
641 /* TO_CODESET is UTF-8. */ |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
642 res2 = utf8conv_carefully (false, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
643 &in2ptr, &in2size, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
644 &out2ptr, &out2size, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
645 &incremented2); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
646 } |
7882 | 647 else /* in1size == 0 && !do_final_flush1 |
648 && in2size == 0 && do_final_flush2 */ | |
649 { | |
650 /* Now get the conversion state of CD1 back to the initial | |
651 state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ | |
652 # if defined _LIBICONV_VERSION \ | |
653 || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
654 if (cd2 != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
655 res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
656 else |
7882 | 657 # endif |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
658 res2 = 0; |
7882 | 659 do_final_flush2 = false; |
660 incremented2 = true; | |
661 } | |
662 | |
663 length = out2ptr - result; | |
664 grow = (length + extra_alloc > allocated / 2); | |
665 if (res2 == (size_t)(-1)) | |
666 { | |
667 if (errno == E2BIG) | |
668 grow = true; | |
669 else if (errno == EINVAL) | |
670 break; | |
671 else if (errno == EILSEQ && handler != iconveh_error) | |
672 { | |
673 /* Error handling can produce up to 10 bytes of ASCII | |
674 output. But TO_CODESET may be UCS-2, UTF-16 or | |
675 UCS-4, so use CD2 here as well. */ | |
676 char scratchbuf[10]; | |
677 size_t scratchlen; | |
678 ucs4_t uc; | |
679 const char *inptr; | |
680 size_t insize; | |
681 size_t res; | |
682 | |
683 if (incremented2) | |
684 { | |
685 if (u8_prev (&uc, (const uint8_t *) in2ptr, | |
686 (const uint8_t *) utf8buf) | |
687 == NULL) | |
688 abort (); | |
689 } | |
690 else | |
691 { | |
692 int n; | |
693 if (in2size == 0) | |
694 abort (); | |
7995
9730f3f14f9d
Rename u8_mbtouc -> u8_mbtouc_unsafe.
Bruno Haible <bruno@clisp.org>
parents:
7944
diff
changeset
|
695 n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr, |
9730f3f14f9d
Rename u8_mbtouc -> u8_mbtouc_unsafe.
Bruno Haible <bruno@clisp.org>
parents:
7944
diff
changeset
|
696 in2size); |
7882 | 697 in2ptr += n; |
698 in2size -= n; | |
699 } | |
700 | |
701 if (handler == iconveh_escape_sequence) | |
702 { | |
703 static char hex[16] = "0123456789ABCDEF"; | |
704 scratchlen = 0; | |
705 scratchbuf[scratchlen++] = '\\'; | |
706 if (uc < 0x10000) | |
707 scratchbuf[scratchlen++] = 'u'; | |
708 else | |
709 { | |
710 scratchbuf[scratchlen++] = 'U'; | |
711 scratchbuf[scratchlen++] = hex[(uc>>28) & 15]; | |
712 scratchbuf[scratchlen++] = hex[(uc>>24) & 15]; | |
713 scratchbuf[scratchlen++] = hex[(uc>>20) & 15]; | |
714 scratchbuf[scratchlen++] = hex[(uc>>16) & 15]; | |
715 } | |
716 scratchbuf[scratchlen++] = hex[(uc>>12) & 15]; | |
717 scratchbuf[scratchlen++] = hex[(uc>>8) & 15]; | |
718 scratchbuf[scratchlen++] = hex[(uc>>4) & 15]; | |
719 scratchbuf[scratchlen++] = hex[uc & 15]; | |
720 } | |
721 else | |
722 { | |
723 scratchbuf[0] = '?'; | |
724 scratchlen = 1; | |
725 } | |
726 | |
727 inptr = scratchbuf; | |
728 insize = scratchlen; | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
729 if (cd2 != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
730 res = iconv (cd2, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
731 (ICONV_CONST char **) &inptr, &insize, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
732 &out2ptr, &out2size); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
733 else |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
734 { |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
735 /* TO_CODESET is UTF-8. */ |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
736 if (out2size >= insize) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
737 { |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
738 memcpy (out2ptr, inptr, insize); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
739 out2ptr += insize; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
740 out2size -= insize; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
741 inptr += insize; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
742 insize = 0; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
743 res = 0; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
744 } |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
745 else |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
746 { |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
747 errno = E2BIG; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
748 res = (size_t)(-1); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
749 } |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
750 } |
7882 | 751 length = out2ptr - result; |
752 if (res == (size_t)(-1) && errno == E2BIG) | |
753 { | |
754 char *memory; | |
755 | |
756 allocated = 2 * allocated; | |
757 if (length + 1 + extra_alloc > allocated) | |
758 abort (); | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
759 if (result == initial_result) |
7882 | 760 memory = (char *) malloc (allocated); |
761 else | |
762 memory = (char *) realloc (result, allocated); | |
763 if (memory == NULL) | |
764 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
765 if (result != initial_result) |
7882 | 766 free (result); |
767 errno = ENOMEM; | |
768 return -1; | |
769 } | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
770 if (result == initial_result) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
771 memcpy (memory, initial_result, length); |
7882 | 772 result = memory; |
773 grow = false; | |
774 | |
775 out2ptr = result + length; | |
776 out2size = allocated - extra_alloc - length; | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
777 if (cd2 != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
778 res = iconv (cd2, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
779 (ICONV_CONST char **) &inptr, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
780 &insize, |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
781 &out2ptr, &out2size); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
782 else |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
783 { |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
784 /* TO_CODESET is UTF-8. */ |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
785 if (!(out2size >= insize)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
786 abort (); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
787 memcpy (out2ptr, inptr, insize); |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
788 out2ptr += insize; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
789 out2size -= insize; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
790 inptr += insize; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
791 insize = 0; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
792 res = 0; |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
793 } |
7882 | 794 length = out2ptr - result; |
795 } | |
796 # if !defined _LIBICONV_VERSION && !defined __GLIBC__ | |
797 /* Irix iconv() inserts a NUL byte if it cannot convert. | |
798 NetBSD iconv() inserts a question mark if it cannot | |
799 convert. | |
800 Only GNU libiconv and GNU libc are known to prefer | |
801 to fail rather than doing a lossy conversion. */ | |
802 if (res != (size_t)(-1) && res > 0) | |
803 { | |
804 errno = EILSEQ; | |
805 res = (size_t)(-1); | |
806 } | |
807 # endif | |
808 if (res == (size_t)(-1)) | |
809 { | |
810 /* Failure converting the ASCII replacement. */ | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
811 if (result != initial_result) |
7882 | 812 { |
813 int saved_errno = errno; | |
814 free (result); | |
815 errno = saved_errno; | |
816 } | |
817 return -1; | |
818 } | |
819 } | |
820 else | |
821 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
822 if (result != initial_result) |
7882 | 823 { |
824 int saved_errno = errno; | |
825 free (result); | |
826 errno = saved_errno; | |
827 } | |
828 return -1; | |
829 } | |
830 } | |
831 if (!(in2size > 0 | |
832 || (in1size == 0 && !do_final_flush1 && do_final_flush2))) | |
833 break; | |
834 if (grow) | |
835 { | |
836 char *memory; | |
837 | |
838 allocated = 2 * allocated; | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
839 if (result == initial_result) |
7882 | 840 memory = (char *) malloc (allocated); |
841 else | |
842 memory = (char *) realloc (result, allocated); | |
843 if (memory == NULL) | |
844 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
845 if (result != initial_result) |
7882 | 846 free (result); |
847 errno = ENOMEM; | |
848 return -1; | |
849 } | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
850 if (result == initial_result) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
851 memcpy (memory, initial_result, length); |
7882 | 852 result = memory; |
853 } | |
854 } | |
855 | |
856 /* Move the remaining bytes to the beginning of utf8buf. */ | |
857 if (in2size > 0) | |
858 memmove (utf8buf, in2ptr, in2size); | |
859 utf8len = in2size; | |
860 } | |
861 | |
862 if (res1 == (size_t)(-1)) | |
863 { | |
864 if (errno1 == EINVAL) | |
865 in1size = 0; | |
866 else if (errno1 == EILSEQ) | |
867 { | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
868 if (result != initial_result) |
7882 | 869 free (result); |
870 errno = errno1; | |
871 return -1; | |
872 } | |
873 } | |
874 } | |
875 # undef utf8bufsize | |
876 } | |
877 | |
878 done: | |
879 /* Now the final memory allocation. */ | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
880 if (result == tmpbuf) |
7882 | 881 { |
9835
69955b112e2e
Avoid failure when attempting to return empty iconv results on some platforms.
Bruno Haible <bruno@clisp.org>
parents:
9309
diff
changeset
|
882 size_t memsize = length + extra_alloc; |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
883 char *memory; |
7882 | 884 |
9835
69955b112e2e
Avoid failure when attempting to return empty iconv results on some platforms.
Bruno Haible <bruno@clisp.org>
parents:
9309
diff
changeset
|
885 memory = (char *) malloc (memsize > 0 ? memsize : 1); |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
886 if (memory != NULL) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
887 { |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
888 memcpy (memory, tmpbuf, length); |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
889 result = memory; |
7882 | 890 } |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
891 else |
7882 | 892 { |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
893 errno = ENOMEM; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
894 return -1; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
895 } |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
896 } |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
897 else if (result != *resultp && length + extra_alloc < allocated) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
898 { |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
899 /* Shrink the allocated memory if possible. */ |
9835
69955b112e2e
Avoid failure when attempting to return empty iconv results on some platforms.
Bruno Haible <bruno@clisp.org>
parents:
9309
diff
changeset
|
900 size_t memsize = length + extra_alloc; |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
901 char *memory; |
7882 | 902 |
9835
69955b112e2e
Avoid failure when attempting to return empty iconv results on some platforms.
Bruno Haible <bruno@clisp.org>
parents:
9309
diff
changeset
|
903 memory = (char *) realloc (result, memsize > 0 ? memsize : 1); |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
904 if (memory != NULL) |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
905 result = memory; |
7882 | 906 } |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
907 *resultp = result; |
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
908 *lengthp = length; |
7882 | 909 return 0; |
910 # undef tmpbuf | |
911 # undef tmpbufsize | |
912 } | |
913 | |
914 int | |
915 mem_cd_iconveh (const char *src, size_t srclen, | |
916 iconv_t cd, iconv_t cd1, iconv_t cd2, | |
917 enum iconv_ilseq_handler handler, | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
918 size_t *offsets, |
7882 | 919 char **resultp, size_t *lengthp) |
920 { | |
921 return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0, | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
922 offsets, resultp, lengthp); |
7882 | 923 } |
924 | |
925 char * | |
926 str_cd_iconveh (const char *src, | |
927 iconv_t cd, iconv_t cd1, iconv_t cd2, | |
928 enum iconv_ilseq_handler handler) | |
929 { | |
930 /* For most encodings, a trailing NUL byte in the input will be converted | |
931 to a trailing NUL byte in the output. But not for UTF-7. So that this | |
932 function is usable for UTF-7, we have to exclude the NUL byte from the | |
933 conversion and add it by hand afterwards. */ | |
934 char *result = NULL; | |
7914
9862bb358a30
Change specification of mem_cd_iconveh.
Bruno Haible <bruno@clisp.org>
parents:
7882
diff
changeset
|
935 size_t length = 0; |
7882 | 936 int retval = mem_cd_iconveh_internal (src, strlen (src), |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
937 cd, cd1, cd2, handler, 1, NULL, |
7882 | 938 &result, &length); |
939 | |
940 if (retval < 0) | |
941 { | |
942 if (result != NULL) | |
943 { | |
944 int saved_errno = errno; | |
945 free (result); | |
946 errno = saved_errno; | |
947 } | |
948 return NULL; | |
949 } | |
950 | |
951 /* Add the terminating NUL byte. */ | |
952 result[length] = '\0'; | |
953 | |
954 return result; | |
955 } | |
956 | |
957 #endif | |
958 | |
7915 | 959 int |
960 mem_iconveh (const char *src, size_t srclen, | |
961 const char *from_codeset, const char *to_codeset, | |
962 enum iconv_ilseq_handler handler, | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
963 size_t *offsets, |
7915 | 964 char **resultp, size_t *lengthp) |
965 { | |
7919 | 966 if (srclen == 0) |
967 { | |
968 /* Nothing to convert. */ | |
969 *lengthp = 0; | |
970 return 0; | |
971 } | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
972 else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0) |
7915 | 973 { |
974 char *result; | |
975 | |
976 if (*resultp != NULL && *lengthp >= srclen) | |
977 result = *resultp; | |
978 else | |
979 { | |
980 result = (char *) malloc (srclen); | |
981 if (result == NULL) | |
982 { | |
983 errno = ENOMEM; | |
984 return -1; | |
985 } | |
986 } | |
987 memcpy (result, src, srclen); | |
988 *resultp = result; | |
989 *lengthp = srclen; | |
990 return 0; | |
991 } | |
992 else | |
993 { | |
994 #if HAVE_ICONV | |
995 iconv_t cd; | |
996 iconv_t cd1; | |
997 iconv_t cd2; | |
998 char *result; | |
999 size_t length; | |
1000 int retval; | |
1001 | |
1002 /* Avoid glibc-2.1 bug with EUC-KR. */ | |
1003 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION | |
1004 if (c_strcasecmp (from_codeset, "EUC-KR") == 0 | |
1005 || c_strcasecmp (to_codeset, "EUC-KR") == 0) | |
1006 { | |
1007 errno = EINVAL; | |
1008 return -1; | |
1009 } | |
1010 # endif | |
1011 | |
1012 cd = iconv_open (to_codeset, from_codeset); | |
1013 | |
7936
b93a4036490a
Optimize the recognition of "UTF-8" strings.
Bruno Haible <bruno@clisp.org>
parents:
7927
diff
changeset
|
1014 if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) |
7915 | 1015 cd1 = (iconv_t)(-1); |
1016 else | |
1017 { | |
1018 cd1 = iconv_open ("UTF-8", from_codeset); | |
1019 if (cd1 == (iconv_t)(-1)) | |
1020 { | |
1021 int saved_errno = errno; | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1022 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1023 iconv_close (cd); |
7915 | 1024 errno = saved_errno; |
1025 return -1; | |
1026 } | |
1027 } | |
1028 | |
9836
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1029 if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0) |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1030 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1031 || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0 |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1032 # endif |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1033 ) |
7915 | 1034 cd2 = (iconv_t)(-1); |
1035 else | |
1036 { | |
1037 cd2 = iconv_open (to_codeset, "UTF-8"); | |
1038 if (cd2 == (iconv_t)(-1)) | |
1039 { | |
1040 int saved_errno = errno; | |
1041 if (cd1 != (iconv_t)(-1)) | |
1042 iconv_close (cd1); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1043 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1044 iconv_close (cd); |
7915 | 1045 errno = saved_errno; |
1046 return -1; | |
1047 } | |
1048 } | |
1049 | |
1050 result = *resultp; | |
1051 length = *lengthp; | |
7927
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
1052 retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets, |
7ebab05df4f6
Add optional offsets argument to conversion routines.
Bruno Haible <bruno@clisp.org>
parents:
7919
diff
changeset
|
1053 &result, &length); |
7915 | 1054 |
1055 if (retval < 0) | |
1056 { | |
1057 /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */ | |
1058 int saved_errno = errno; | |
1059 if (cd2 != (iconv_t)(-1)) | |
1060 iconv_close (cd2); | |
1061 if (cd1 != (iconv_t)(-1)) | |
1062 iconv_close (cd1); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1063 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1064 iconv_close (cd); |
7915 | 1065 errno = saved_errno; |
1066 } | |
1067 else | |
1068 { | |
1069 if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0) | |
1070 { | |
1071 /* Return -1, but free the allocated memory, and while doing | |
1072 that, preserve the errno from iconv_close. */ | |
1073 int saved_errno = errno; | |
1074 if (cd1 != (iconv_t)(-1)) | |
1075 iconv_close (cd1); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1076 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1077 iconv_close (cd); |
7915 | 1078 if (result != *resultp && result != NULL) |
1079 free (result); | |
1080 errno = saved_errno; | |
1081 return -1; | |
1082 } | |
1083 if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0) | |
1084 { | |
1085 /* Return -1, but free the allocated memory, and while doing | |
1086 that, preserve the errno from iconv_close. */ | |
1087 int saved_errno = errno; | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1088 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1089 iconv_close (cd); |
7915 | 1090 if (result != *resultp && result != NULL) |
1091 free (result); | |
1092 errno = saved_errno; | |
1093 return -1; | |
1094 } | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1095 if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) |
7915 | 1096 { |
1097 /* Return -1, but free the allocated memory, and while doing | |
1098 that, preserve the errno from iconv_close. */ | |
1099 int saved_errno = errno; | |
1100 if (result != *resultp && result != NULL) | |
1101 free (result); | |
1102 errno = saved_errno; | |
1103 return -1; | |
1104 } | |
1105 *resultp = result; | |
1106 *lengthp = length; | |
1107 } | |
1108 return retval; | |
1109 #else | |
1110 /* This is a different error code than if iconv_open existed but didn't | |
1111 support from_codeset and to_codeset, so that the caller can emit | |
1112 an error message such as | |
1113 "iconv() is not supported. Installing GNU libiconv and | |
1114 then reinstalling this package would fix this." */ | |
1115 errno = ENOSYS; | |
1116 return -1; | |
1117 #endif | |
1118 } | |
1119 } | |
1120 | |
7882 | 1121 char * |
1122 str_iconveh (const char *src, | |
1123 const char *from_codeset, const char *to_codeset, | |
1124 enum iconv_ilseq_handler handler) | |
1125 { | |
7919 | 1126 if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) |
7917
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1127 { |
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1128 char *result = strdup (src); |
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1129 |
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1130 if (result == NULL) |
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1131 errno = ENOMEM; |
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1132 return result; |
0b6b5a675452
Ensure errno when strdup fails.
Bruno Haible <bruno@clisp.org>
parents:
7915
diff
changeset
|
1133 } |
7882 | 1134 else |
1135 { | |
1136 #if HAVE_ICONV | |
1137 iconv_t cd; | |
1138 iconv_t cd1; | |
1139 iconv_t cd2; | |
1140 char *result; | |
1141 | |
1142 /* Avoid glibc-2.1 bug with EUC-KR. */ | |
1143 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION | |
1144 if (c_strcasecmp (from_codeset, "EUC-KR") == 0 | |
1145 || c_strcasecmp (to_codeset, "EUC-KR") == 0) | |
1146 { | |
1147 errno = EINVAL; | |
1148 return NULL; | |
1149 } | |
1150 # endif | |
1151 | |
1152 cd = iconv_open (to_codeset, from_codeset); | |
1153 | |
7936
b93a4036490a
Optimize the recognition of "UTF-8" strings.
Bruno Haible <bruno@clisp.org>
parents:
7927
diff
changeset
|
1154 if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) |
7882 | 1155 cd1 = (iconv_t)(-1); |
1156 else | |
1157 { | |
1158 cd1 = iconv_open ("UTF-8", from_codeset); | |
1159 if (cd1 == (iconv_t)(-1)) | |
1160 { | |
1161 int saved_errno = errno; | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1162 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1163 iconv_close (cd); |
7882 | 1164 errno = saved_errno; |
1165 return NULL; | |
1166 } | |
1167 } | |
1168 | |
9836
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1169 if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0) |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1170 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1171 || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0 |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1172 # endif |
69507e40ea4f
Optimize conversion from UTF-8 to UTF-8//TRANSLIT.
Bruno Haible <bruno@clisp.org>
parents:
9835
diff
changeset
|
1173 ) |
7882 | 1174 cd2 = (iconv_t)(-1); |
1175 else | |
1176 { | |
1177 cd2 = iconv_open (to_codeset, "UTF-8"); | |
1178 if (cd2 == (iconv_t)(-1)) | |
1179 { | |
1180 int saved_errno = errno; | |
1181 if (cd1 != (iconv_t)(-1)) | |
1182 iconv_close (cd1); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1183 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1184 iconv_close (cd); |
7882 | 1185 errno = saved_errno; |
1186 return NULL; | |
1187 } | |
1188 } | |
1189 | |
1190 result = str_cd_iconveh (src, cd, cd1, cd2, handler); | |
1191 | |
1192 if (result == NULL) | |
1193 { | |
1194 /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */ | |
1195 int saved_errno = errno; | |
1196 if (cd2 != (iconv_t)(-1)) | |
1197 iconv_close (cd2); | |
1198 if (cd1 != (iconv_t)(-1)) | |
1199 iconv_close (cd1); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1200 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1201 iconv_close (cd); |
7882 | 1202 errno = saved_errno; |
1203 } | |
1204 else | |
1205 { | |
1206 if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0) | |
1207 { | |
1208 /* Return NULL, but free the allocated memory, and while doing | |
1209 that, preserve the errno from iconv_close. */ | |
1210 int saved_errno = errno; | |
1211 if (cd1 != (iconv_t)(-1)) | |
1212 iconv_close (cd1); | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1213 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1214 iconv_close (cd); |
7882 | 1215 free (result); |
1216 errno = saved_errno; | |
1217 return NULL; | |
1218 } | |
1219 if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0) | |
1220 { | |
1221 /* Return NULL, but free the allocated memory, and while doing | |
1222 that, preserve the errno from iconv_close. */ | |
1223 int saved_errno = errno; | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1224 if (cd != (iconv_t)(-1)) |
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1225 iconv_close (cd); |
7882 | 1226 free (result); |
1227 errno = saved_errno; | |
1228 return NULL; | |
1229 } | |
8605
414a0a3d5fac
Do an indirect conversion if iconv_open does not support a direct conversion.
Bruno Haible <bruno@clisp.org>
parents:
8577
diff
changeset
|
1230 if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) |
7882 | 1231 { |
1232 /* Return NULL, but free the allocated memory, and while doing | |
1233 that, preserve the errno from iconv_close. */ | |
1234 int saved_errno = errno; | |
1235 free (result); | |
1236 errno = saved_errno; | |
1237 return NULL; | |
1238 } | |
1239 } | |
1240 return result; | |
1241 #else | |
1242 /* This is a different error code than if iconv_open existed but didn't | |
1243 support from_codeset and to_codeset, so that the caller can emit | |
1244 an error message such as | |
1245 "iconv() is not supported. Installing GNU libiconv and | |
1246 then reinstalling this package would fix this." */ | |
1247 errno = ENOSYS; | |
1248 return NULL; | |
1249 #endif | |
1250 } | |
1251 } |