Mercurial > hg > octave-kai > gnulib-hg
diff lib/quotearg.c @ 2124:30abc8682bdf
Quote multibyte characters correctly.
(ISGRAPH): Remove.
(ISPRINT): New macro.
(<wchar.h>): Include if HAVE_MBRTOWC && HAVE_WCHAR_H.
(isprint, mbrtowc, mbsinit, mbstate_t): New macros,
defined if ! (HAVE_MBRTOWC && HAVE_WCHAR_H).
(quotearg_buffer_restyled): New function, with most of the old
quotearg_buffer's contents.
Major rewrite to support multibyte characters.
(quotearg_buffer): Now just calls quotearg_buffer_restyled.
author | Jim Meyering <jim@meyering.net> |
---|---|
date | Sat, 15 Jan 2000 11:57:11 +0000 (2000-01-15) |
parents | 78f6058b59a6 |
children | d0a924014953 |
line wrap: on
line diff
--- a/lib/quotearg.c +++ b/lib/quotearg.c @@ -1,5 +1,5 @@ /* quotearg.c - quote arguments for output - Copyright (C) 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,6 @@ /* Written by Paul Eggert <eggert@twinsun.com> */ -/* FIXME: Multibyte characters are not supported yet. */ - #if HAVE_CONFIG_H # include <config.h> #endif @@ -33,11 +31,7 @@ #else # define ISASCII(c) isascii (c) #endif -#ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -#else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -#endif +#define ISPRINT(c) (ISASCII (c) && isprint (c)) #if ENABLE_NLS # include <libintl.h> @@ -64,6 +58,15 @@ # include <string.h> #endif +#if HAVE_MBRTOWC && HAVE_WCHAR_H +# include <wchar.h> +#else +# define iswprint(wc) 1 +# define mbrtowc(pwc, s, n, ps) 1 +# define mbsinit(ps) 1 +# define mbstate_t int +#endif + #define INT_BITS (sizeof (int) * CHAR_BIT) struct quoting_options @@ -71,7 +74,7 @@ /* Basic quoting style. */ enum quoting_style style; - /* Quote the chararacters indicated by this bit vector even if the + /* Quote the characters indicated by this bit vector even if the quoting style would not normally require them to be quoted. */ int quote_these_too[((UCHAR_MAX + 1) / INT_BITS + ((UCHAR_MAX + 1) % INT_BITS != 0))]; @@ -89,7 +92,7 @@ 0 }; -/* Correspondances to quoting style names. */ +/* Correspondences to quoting style names. */ enum quoting_style const quoting_style_vals[] = { literal_quoting_style, @@ -147,6 +150,292 @@ } /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of + argument ARG (of size ARGSIZE), using QUOTING_STYLE and the + non-quoting-style part of O to control quoting. + Terminate the output with a null character, and return the written + size of the output, not counting the terminating null. + If BUFFERSIZE is too small to store the output string, return the + value that would have been returned had BUFFERSIZE been large enough. + If ARGSIZE is -1, use the string length of the argument for ARGSIZE. + + This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, + ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting + style specified by O, and O may not be null. */ + +static size_t +quotearg_buffer_restyled (char *buffer, size_t buffersize, + char const *arg, size_t argsize, + enum quoting_style quoting_style, + struct quoting_options const *o) +{ + size_t i; + size_t len = 0; + char const *quote_string = 0; + size_t quote_string_len = 0; + int backslash_escapes = 0; + +#define STORE(c) \ + do \ + { \ + if (len < buffersize) \ + buffer[len] = (c); \ + len++; \ + } \ + while (0) + + switch (quoting_style) + { + case c_quoting_style: + STORE ('"'); + backslash_escapes = 1; + quote_string = "\""; + quote_string_len = 1; + break; + + case escape_quoting_style: + backslash_escapes = 1; + break; + + case locale_quoting_style: + for (quote_string = _("`"); *quote_string; quote_string++) + STORE (*quote_string); + backslash_escapes = 1; + quote_string = _("'"); + quote_string_len = strlen (quote_string); + break; + + case shell_always_quoting_style: + STORE ('\''); + quote_string = "'"; + quote_string_len = 1; + break; + + default: + break; + } + + for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) + { + unsigned char c; + unsigned char esc; + + if (backslash_escapes + && quote_string_len + && i + quote_string_len <= argsize + && memcmp (arg + i, quote_string, quote_string_len) == 0) + STORE ('\\'); + + c = arg[i]; + switch (c) + { + case '?': + switch (quoting_style) + { + case shell_quoting_style: + goto use_shell_always_quoting_style; + + case c_quoting_style: + if (i + 2 < argsize && arg[i + 1] == '?') + switch (arg[i + 2]) + { + case '!': case '\'': + case '(': case ')': case '-': case '/': + case '<': case '=': case '>': + /* Escape the second '?' in what would otherwise be + a trigraph. */ + i += 2; + c = arg[i + 2]; + STORE ('?'); + STORE ('\\'); + STORE ('?'); + break; + } + break; + + default: + break; + } + break; + +#if HAVE_C_BACKSLASH_A + case '\a': esc = 'a'; goto c_escape; +#endif + case '\b': esc = 'b'; goto c_escape; + case '\f': esc = 'f'; goto c_escape; + case '\n': esc = 'n'; goto c_escape; + case '\r': esc = 'r'; goto c_escape; + case '\t': esc = 't'; goto c_escape; + case '\v': esc = 'v'; goto c_escape; + case '\\': esc = c; goto c_escape; + + c_escape: + if (backslash_escapes) + { + c = esc; + goto store_escape; + } + if (quoting_style == shell_quoting_style) + goto use_shell_always_quoting_style; + break; + + case '#': case '~': + if (i != 0) + break; + /* Fall through. */ + case ' ': + case '!': /* special in bash */ + case '"': case '$': case '&': + case '(': case ')': case '*': case ';': + case '<': case '>': case '[': + case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ + case '`': case '|': + /* A shell special character. In theory, '$' and '`' could + be the first bytes of multibyte characters, which means + we should check them with mbrtowc, but in practice this + doesn't happen so it's not worth worrying about. */ + if (quoting_style == shell_quoting_style) + goto use_shell_always_quoting_style; + break; + + case '\'': + switch (quoting_style) + { + case shell_quoting_style: + goto use_shell_always_quoting_style; + + case shell_always_quoting_style: + STORE ('\''); + STORE ('\\'); + STORE ('\''); + break; + + default: + break; + } + break; + + case '%': case '+': case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': case ':': case '=': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': + case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case '{': case '}': + /* These characters don't cause problems, no matter what the + quoting style is. They cannot start multibyte sequences. */ + break; + + default: + /* If we have a multibyte sequence, copy it until we reach + its end, find an error, or come back to the initial shift + state. For C-like styles, if the sequence has + unprintable characters, escape the whole sequence, since + we can't easily escape single characters within it. */ + { + /* Length of multibyte sequence found so far. */ + size_t m = 0; + + int printable = 1; + mbstate_t mbstate; + memset (&mbstate, 0, sizeof mbstate); + + if (argsize == (size_t) -1) + argsize = strlen (arg); + + do + { + wchar_t w; + size_t bytes = mbrtowc (&w, &arg[i + m], + argsize - (i + m), &mbstate); + if (bytes == 0) + break; + else if (bytes == (size_t) -1) + { + printable = 0; + break; + } + else if (bytes == (size_t) -2) + { + printable = 0; + while (i + m < argsize && arg[i + m]) + m++; + break; + } + else + { + if (! iswprint (w)) + printable = 0; + m += bytes; + } + } + while (! mbsinit (&mbstate)); + + if (m <= 1) + { + /* Escape a unibyte character like a multibyte + sequence if using backslash escapes, and if the + character is not printable. */ + m = backslash_escapes && ! ISPRINT (c); + printable = 0; + } + + if (m) + { + /* Output a multibyte sequence, or an escaped + unprintable unibyte character. */ + size_t imax = i + m - 1; + + for (;;) + { + if (backslash_escapes && ! printable) + { + STORE ('\\'); + STORE ('0' + (c >> 6)); + STORE ('0' + ((c >> 3) & 7)); + c = '0' + (c & 7); + } + if (i == imax) + break; + STORE (c); + c = arg[++i]; + } + + goto store_c; + } + } + } + + if (! (backslash_escapes + && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) + goto store_c; + + store_escape: + STORE ('\\'); + + store_c: + STORE (c); + } + + if (quote_string) + for (; *quote_string; quote_string++) + STORE (*quote_string); + + if (len < buffersize) + buffer[len] = '\0'; + return len; + + use_shell_always_quoting_style: + return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, + shell_always_quoting_style, o); +} + +/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of argument ARG (of size ARGSIZE), using O to control quoting. If O is null, use the default. Terminate the output with a null character, and return the written @@ -159,161 +448,9 @@ char const *arg, size_t argsize, struct quoting_options const *o) { - unsigned char c; - size_t i; - size_t len = 0; - char const *quote_string; - size_t quote_string_len; struct quoting_options const *p = o ? o : &default_quoting_options; - enum quoting_style quoting_style = p->style; -#define STORE(c) \ - do \ - { \ - if (len < buffersize) \ - buffer[len] = (c); \ - len++; \ - } \ - while (0) - - switch (quoting_style) - { - case shell_quoting_style: - if (! (argsize == (size_t) -1 ? arg[0] == '\0' : argsize == 0)) - { - switch (arg[0]) - { - case '#': case '~': - break; - - default: - for (i = 0; ; i++) - { - if (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize) - goto done; - - c = arg[i]; - - switch (c) - { - case '\t': case '\n': case ' ': - case '!': /* special in csh */ - case '"': case '$': case '&': case '\'': - case '(': case ')': case '*': case ';': - case '<': case '>': case '?': case '[': case '\\': - case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ - case '`': case '|': - goto needs_quoting; - } - - if (p->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))) - goto needs_quoting; - - STORE (c); - } - needs_quoting:; - - len = 0; - break; - } - } - /* Fall through. */ - - case shell_always_quoting_style: - STORE ('\''); - quote_string = "'"; - quote_string_len = 1; - break; - - case c_quoting_style: - STORE ('"'); - quote_string = "\""; - quote_string_len = 1; - break; - - case locale_quoting_style: - for (quote_string = _("`"); *quote_string; quote_string++) - STORE (*quote_string); - quote_string = _("'"); - quote_string_len = strlen (quote_string); - break; - - default: - quote_string = 0; - quote_string_len = 0; - break; - } - - for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) - { - c = arg[i]; - - switch (quoting_style) - { - case literal_quoting_style: - break; - - case shell_quoting_style: - case shell_always_quoting_style: - if (c == '\'') - { - STORE ('\''); - STORE ('\\'); - STORE ('\''); - } - break; - - case c_quoting_style: - case escape_quoting_style: - case locale_quoting_style: - switch (c) - { - case '?': /* Do not generate trigraphs. */ - case '\\': goto store_escape; - /* Not all C compilers know what \a means. */ - case 7 : c = 'a'; goto store_escape; - case '\b': c = 'b'; goto store_escape; - case '\f': c = 'f'; goto store_escape; - case '\n': c = 'n'; goto store_escape; - case '\r': c = 'r'; goto store_escape; - case '\t': c = 't'; goto store_escape; - case '\v': c = 'v'; goto store_escape; - - case ' ': break; - - default: - if (quote_string_len - && strncmp (arg + i, quote_string, quote_string_len) == 0) - goto store_escape; - if (!ISGRAPH (c)) - { - STORE ('\\'); - STORE ('0' + (c >> 6)); - STORE ('0' + ((c >> 3) & 7)); - c = '0' + (c & 7); - goto store_c; - } - break; - } - - if (! (p->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) - goto store_c; - - store_escape: - STORE ('\\'); - } - - store_c: - STORE (c); - } - - if (quote_string) - for (; *quote_string; quote_string++) - STORE (*quote_string); - - done: - if (len < buffersize) - buffer[len] = '\0'; - return len; + return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, + p->style, p); } /* Use storage slot N to return a quoted version of the string ARG.