Mercurial > hg > octave-nkf > gnulib-hg
annotate lib/unicodeio.c @ 3843:2f0e1c3ba75b
*** empty log message ***
author | Jim Meyering <jim@meyering.net> |
---|---|
date | Wed, 22 May 2002 08:36:32 +0000 |
parents | f80e972daf0f |
children | 22d3032f0239 |
rev | line source |
---|---|
2305 | 1 /* Unicode character output to streams with locale dependent encoding. |
2 | |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
3 Copyright (C) 2000-2002 Free Software Foundation, Inc. |
2305 | 4 |
5 This program is free software; you can redistribute it and/or modify it | |
6 under the terms of the GNU Library General Public License as published | |
7 by the Free Software Foundation; either version 2, or (at your option) | |
8 any later version. | |
9 | |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 Library General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU Library General Public | |
16 License along with this program; if not, write to the Free Software | |
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
18 USA. */ | |
19 | |
20 /* Written by Bruno Haible <haible@clisp.cons.org>. */ | |
21 | |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
22 /* Note: This file requires the locale_charset() function. See in |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
23 libiconv-1.7/libcharset/INTEGRATE for how to obtain it. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
24 |
2305 | 25 #ifdef HAVE_CONFIG_H |
26 # include <config.h> | |
27 #endif | |
28 | |
29 #if HAVE_STDDEF_H | |
30 # include <stddef.h> | |
31 #endif | |
32 | |
33 #include <stdio.h> | |
2352
84a00de8c81b
(<string.h>): Include only #if HAVE_STRING_H.
Jim Meyering <jim@meyering.net>
parents:
2351
diff
changeset
|
34 #if HAVE_STRING_H |
84a00de8c81b
(<string.h>): Include only #if HAVE_STRING_H.
Jim Meyering <jim@meyering.net>
parents:
2351
diff
changeset
|
35 # include <string.h> |
84a00de8c81b
(<string.h>): Include only #if HAVE_STRING_H.
Jim Meyering <jim@meyering.net>
parents:
2351
diff
changeset
|
36 #else |
84a00de8c81b
(<string.h>): Include only #if HAVE_STRING_H.
Jim Meyering <jim@meyering.net>
parents:
2351
diff
changeset
|
37 # include <strings.h> |
84a00de8c81b
(<string.h>): Include only #if HAVE_STRING_H.
Jim Meyering <jim@meyering.net>
parents:
2351
diff
changeset
|
38 #endif |
2305 | 39 |
2329 | 40 #include <errno.h> |
41 #ifndef errno | |
42 extern int errno; | |
43 #endif | |
2969 | 44 |
2305 | 45 #if HAVE_ICONV |
46 # include <iconv.h> | |
47 #endif | |
48 | |
49 #include <error.h> | |
50 | |
51 #if ENABLE_NLS | |
52 # include <libintl.h> | |
53 #else | |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
54 # define gettext(Text) Text |
2305 | 55 #endif |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
56 #define _(Text) gettext (Text) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
57 #define N_(Text) Text |
2305 | 58 |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
59 /* Specification. */ |
2305 | 60 #include "unicodeio.h" |
61 | |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
62 /* When we pass a Unicode character to iconv(), we must pass it in a |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
63 suitable encoding. The standardized Unicode encodings are |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
64 UTF-8, UCS-2, UCS-4, UTF-16, UTF-16BE, UTF-16LE, UTF-7. |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
65 UCS-2 supports only characters up to \U0000FFFF. |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
66 UTF-16 and variants support only characters up to \U0010FFFF. |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
67 UTF-7 is way too complex and not supported by glibc-2.1. |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
68 UCS-4 specification leaves doubts about endianness and byte order |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
69 mark. glibc currently interprets it as big endian without byte order |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
70 mark, but this is not backed by an RFC. |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
71 So we use UTF-8. It supports characters up to \U7FFFFFFF and is |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
72 unambiguously defined. */ |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
73 |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
74 /* Stores the UTF-8 representation of the Unicode character wc in r[0..5]. |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
75 Returns the number of bytes stored, or -1 if wc is out of range. */ |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
76 static int |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
77 utf8_wctomb (unsigned char *r, unsigned int wc) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
78 { |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
79 int count; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
80 |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
81 if (wc < 0x80) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
82 count = 1; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
83 else if (wc < 0x800) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
84 count = 2; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
85 else if (wc < 0x10000) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
86 count = 3; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
87 else if (wc < 0x200000) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
88 count = 4; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
89 else if (wc < 0x4000000) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
90 count = 5; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
91 else if (wc <= 0x7fffffff) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
92 count = 6; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
93 else |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
94 return -1; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
95 |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
96 switch (count) |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
97 { |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
98 /* Note: code falls through cases! */ |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
99 case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
100 case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
101 case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
102 case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
103 case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
104 case 1: r[0] = wc; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
105 } |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
106 |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
107 return count; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
108 } |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
109 |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
110 /* Luckily, the encoding's name is platform independent. */ |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
111 #define UTF8_NAME "UTF-8" |
2305 | 112 |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
113 /* Converts the Unicode character CODE to its multibyte representation |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
114 in the current locale and calls the SUCCESS callback on the resulting |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
115 byte sequence. If an error occurs, invokes the FAILURE callback instead, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
116 passing it CODE and an English error string. |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
117 Returns whatever the callback returned. |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
118 Assumes that the locale doesn't change between two calls. */ |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
119 long |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
120 unicode_to_mb (unsigned int code, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
121 long (*success) PARAMS ((const char *buf, size_t buflen, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
122 void *callback_arg)), |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
123 long (*failure) PARAMS ((unsigned int code, const char *msg, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
124 void *callback_arg)), |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
125 void *callback_arg) |
2305 | 126 { |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
127 static int initialized; |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
128 static int is_utf8; |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
129 #if HAVE_ICONV |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
130 static iconv_t utf8_to_local; |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
131 #endif |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
132 |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
133 char inbuf[6]; |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
134 int count; |
2305 | 135 |
136 if (!initialized) | |
137 { | |
2650
ba5bbff44a5b
(print_unicode_char): Work around ansi2knr deficiency.
Jim Meyering <jim@meyering.net>
parents:
2412
diff
changeset
|
138 extern const char *locale_charset PARAMS ((void)); |
2305 | 139 const char *charset = locale_charset (); |
140 | |
3202
283d780ab032
(print_unicode_char): Simplify accordingly.
Jim Meyering <jim@meyering.net>
parents:
3123
diff
changeset
|
141 is_utf8 = !strcmp (charset, UTF8_NAME); |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
142 #if HAVE_ICONV |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
143 if (!is_utf8) |
2305 | 144 { |
3202
283d780ab032
(print_unicode_char): Simplify accordingly.
Jim Meyering <jim@meyering.net>
parents:
3123
diff
changeset
|
145 utf8_to_local = iconv_open (charset, UTF8_NAME); |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
146 if (utf8_to_local == (iconv_t)(-1)) |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
147 /* For an unknown encoding, assume ASCII. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
148 utf8_to_local = iconv_open ("ASCII", UTF8_NAME); |
2305 | 149 } |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
150 #endif |
2305 | 151 initialized = 1; |
152 } | |
153 | |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
154 /* Test whether the utf8_to_local converter is available at all. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
155 if (!is_utf8) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
156 { |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
157 #if HAVE_ICONV |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
158 if (utf8_to_local == (iconv_t)(-1)) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
159 return failure (code, N_("iconv function not usable"), callback_arg); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
160 #else |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
161 return failure (code, N_("iconv function not available"), callback_arg); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
162 #endif |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
163 } |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
164 |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
165 /* Convert the character to UTF-8. */ |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
166 count = utf8_wctomb ((unsigned char *) inbuf, code); |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
167 if (count < 0) |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
168 return failure (code, N_("character out of range"), callback_arg); |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
169 |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
170 #if HAVE_ICONV |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
171 if (!is_utf8) |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
172 { |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
173 char outbuf[25]; |
3123
f65bb2158462
(print_unicode_char): Cast the second iconv() arg,
Jim Meyering <jim@meyering.net>
parents:
3090
diff
changeset
|
174 const char *inptr; |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
175 size_t inbytesleft; |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
176 char *outptr; |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
177 size_t outbytesleft; |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
178 size_t res; |
2305 | 179 |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
180 inptr = inbuf; |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
181 inbytesleft = count; |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
182 outptr = outbuf; |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
183 outbytesleft = sizeof (outbuf); |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
184 |
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
185 /* Convert the character from UTF-8 to the locale's charset. */ |
3123
f65bb2158462
(print_unicode_char): Cast the second iconv() arg,
Jim Meyering <jim@meyering.net>
parents:
3090
diff
changeset
|
186 res = iconv (utf8_to_local, |
f65bb2158462
(print_unicode_char): Cast the second iconv() arg,
Jim Meyering <jim@meyering.net>
parents:
3090
diff
changeset
|
187 (ICONV_CONST char **)&inptr, &inbytesleft, |
f65bb2158462
(print_unicode_char): Cast the second iconv() arg,
Jim Meyering <jim@meyering.net>
parents:
3090
diff
changeset
|
188 &outptr, &outbytesleft); |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
189 if (inbytesleft > 0 || res == (size_t)(-1) |
2412
00595b9008f3
(print_unicode_char): Avoid triggering Solaris iconv bug.
Jim Meyering <jim@meyering.net>
parents:
2352
diff
changeset
|
190 /* Irix iconv() inserts a NUL byte if it cannot convert. */ |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
191 # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) |
2412
00595b9008f3
(print_unicode_char): Avoid triggering Solaris iconv bug.
Jim Meyering <jim@meyering.net>
parents:
2352
diff
changeset
|
192 || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
193 # endif |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
194 ) |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
195 return failure (code, NULL, callback_arg); |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
196 |
2412
00595b9008f3
(print_unicode_char): Avoid triggering Solaris iconv bug.
Jim Meyering <jim@meyering.net>
parents:
2352
diff
changeset
|
197 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
198 # if defined _LIBICONV_VERSION \ |
2412
00595b9008f3
(print_unicode_char): Avoid triggering Solaris iconv bug.
Jim Meyering <jim@meyering.net>
parents:
2352
diff
changeset
|
199 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) |
2305 | 200 |
2351
f0f61b4d7d83
(utf8_wctomb): New function.
Jim Meyering <jim@meyering.net>
parents:
2329
diff
changeset
|
201 /* Get back to the initial shift state. */ |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
202 res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); |
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
203 if (res == (size_t)(-1)) |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
204 return failure (code, NULL, callback_arg); |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
205 # endif |
2305 | 206 |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
207 return success (outbuf, outptr - outbuf, callback_arg); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
208 } |
2983
4dbece6c259b
Back out Paul's changes while we wait for him and Bruno to reach agreement.
Jim Meyering <jim@meyering.net>
parents:
2977
diff
changeset
|
209 #endif |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
210 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
211 /* At this point, is_utf8 is true, so no conversion is needed. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
212 return success (inbuf, count, callback_arg); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
213 } |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
214 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
215 /* Simple success callback that outputs the converted string. |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
216 The STREAM is passed as callback_arg. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
217 long |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
218 fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
219 { |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
220 FILE *stream = (FILE *) callback_arg; |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
221 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
222 fwrite (buf, 1, buflen, stream); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
223 return 0; |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
224 } |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
225 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
226 /* Simple failure callback that displays an error and exits. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
227 static long |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
228 exit_failure_callback (unsigned int code, const char *msg, void *callback_arg) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
229 { |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
230 if (msg == NULL) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
231 error (1, 0, _("cannot convert U+%04X to local character set"), code); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
232 else |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
233 error (1, 0, _("cannot convert U+%04X to local character set: %s"), code, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
234 gettext (msg)); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
235 return -1; |
2305 | 236 } |
3727
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
237 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
238 /* Simple failure callback that displays a fallback representation in plain |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
239 ASCII, using the same notation as ISO C99 strings. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
240 static long |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
241 fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
242 { |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
243 FILE *stream = (FILE *) callback_arg; |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
244 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
245 if (code < 0x10000) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
246 fprintf (stream, "\\u%04X", code); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
247 else |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
248 fprintf (stream, "\\U%08X", code); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
249 return -1; |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
250 } |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
251 |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
252 /* Outputs the Unicode character CODE to the output stream STREAM. |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
253 Upon failure, exit if exit_on_error is true, otherwise output a fallback |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
254 notation. */ |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
255 void |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
256 print_unicode_char (FILE *stream, unsigned int code, int exit_on_error) |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
257 { |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
258 unicode_to_mb (code, fwrite_success_callback, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
259 exit_on_error |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
260 ? exit_failure_callback |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
261 : fallback_failure_callback, |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
262 stream); |
f80e972daf0f
2002-02-02 Paul Eggert <eggert@twinsun.com>
Jim Meyering <jim@meyering.net>
parents:
3202
diff
changeset
|
263 } |