Mercurial > hg > octave-kai > gnulib-hg
changeset 9075:4d262fc1845c
Work around MacOS X wcwidth(0x0301) bug.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Sat, 07 Jul 2007 21:38:16 +0000 |
parents | 3948e119dd4f |
children | 93f79b5d3cc2 |
files | ChangeLog doc/functions/wcwidth.texi lib/wcwidth.c m4/wcwidth.m4 modules/wcwidth |
diffstat | 5 files changed, 80 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2007-07-07 Bruno Haible <bruno@clisp.org> + + Work around MacOS X wcwidth() bug. + * m4/wcwidth.m4 (gl_FUNC_WCWIDTH): Test against MacOS X 10.3 bug. + * lib/wcwidth.c: Include localcharset.h, streq.h, uniwidth.h. + (rpl_wcwidth): Special-case the UTF-8 locales. Fall back to the + original wcwidth in non-UTF-8 locales. + * modules/wcwidth (Depends-on): Add localcharset, streq, + uniwidth/width. + * doc/functions/wcwidth.texi: Update. + 2007-07-07 Bruno Haible <bruno@clisp.org> * lib/wchar_.h: Include the GL_LINK_WARNING macro.
--- a/doc/functions/wcwidth.texi +++ b/doc/functions/wcwidth.texi @@ -11,15 +11,15 @@ @item This function is missing on some platforms: Solaris 2.5.1, mingw, BeOS. +@item +This function handles combining characters in UTF-8 locales incorrectly on some +platforms: +MacOS X 10.3. @end itemize Portability problems not fixed by Gnulib: @itemize @item -This function handles combining characters in UTF-8 locales incorrectly on some -platforms: -MacOS X 10.3. -@item On Windows platforms, @code{wchar_t} is a 16-bit type and therefore cannot accommodate all Unicode characters. @end itemize
--- a/lib/wcwidth.c +++ b/lib/wcwidth.c @@ -23,8 +23,30 @@ /* Get iswprint. */ #include <wctype.h> +#include "localcharset.h" +#include "streq.h" +#include "uniwidth.h" + +#undef wcwidth + int rpl_wcwidth (wchar_t wc) { - return wc == 0 ? 0 : iswprint (wc) ? 1 : -1; + /* In UTF-8 locales, use a Unicode aware width function. */ + const char *encoding = locale_charset (); + if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0)) + { + /* We assume that in a UTF-8 locale, a wide character is the same as a + Unicode character. */ + return uc_width (wc, encoding); + } + else + { + /* Otherwise, fall back to the system's wcwidth function. */ +#if HAVE_WCWIDTH + return wcwidth (wc); +#else + return wc == 0 ? 0 : iswprint (wc) ? 1 : -1; +#endif + } }
--- a/m4/wcwidth.m4 +++ b/m4/wcwidth.m4 @@ -1,4 +1,4 @@ -# wcwidth.m4 serial 10 +# wcwidth.m4 serial 11 dnl Copyright (C) 2006, 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -35,6 +35,44 @@ if test $ac_cv_func_wcwidth = no; then REPLACE_WCWIDTH=1 + else + dnl On MacOS X 10.3, wcwidth(0x0301) (COMBINING ACUTE ACCENT) returns 1. + dnl This leads to bugs in 'ls' (coreutils). + AC_CACHE_CHECK([whether wcwidth works reasonably in UTF-8 locales], + [gl_cv_func_wcwidth_works], + [ + AC_TRY_RUN([ +#include <locale.h> +/* AIX 3.2.5 declares wcwidth in <string.h>. */ +#include <string.h> +/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before + <wchar.h>. + BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be included + before <wchar.h>. */ +#include <stddef.h> +#include <stdio.h> +#include <time.h> +#include <wchar.h> +#if !HAVE_DECL_WCWIDTH +extern +# ifdef __cplusplus +"C" +# endif +int wcwidth (int); +#endif +int main () +{ + if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL) + if (wcwidth (0x0301) > 0) + return 1; + return 0; +}], [gl_cv_func_wcwidth_works=yes], [gl_cv_func_wcwidth_works=no], + [gl_cv_func_wcwidth_works="guessing no"]) + ]) + case "$gl_cv_func_wcwidth_works" in + *yes) ;; + *no) REPLACE_WCWIDTH=1 ;; + esac fi if test $REPLACE_WCWIDTH = 1; then AC_LIBOBJ([wcwidth])