view lib/unistr/u-strtok.h @ 18042:6ff81241d5b7

unistr/uN-strtok: handle multibyte delimiters Previously, uN_strtok moved PTR to the next unit to the token end. When DELIM contained a multibyte character, the new position could be a middle of a multibyte character. * lib/unistr/u-strtok.h (FUNC): Place PTR at the next character after the token. * lib/unistr/u8-strtok.c (U_STRMBLEN): New macro. * lib/unistr/u16-strtok.c (U_STRMBLEN): New macro. * lib/unistr/u32-strtok.c (U_STRMBLEN): New macro. * modules/unistr/u8-strtok (Depends-on): Depend on unistr/u8-strmblen. * modules/unistr/u16-strtok (Depends-on): Depend on unistr/u16-strmblen. * modules/unistr/u32-strtok (Depends-on): Depend on unistr/u32-strmblen. * tests/unistr/test-u-strtok.h: New file. * tests/unistr/test-u8-strtok.c: New file. * tests/unistr/test-u16-strtok.c: New file. * tests/unistr/test-u32-strtok.c: New file. * modules/unistr/u8-strtok-tests: New file. * modules/unistr/u32-strtok-tests: New file. * modules/unistr/u16-strtok-tests: New file. Copyright-paperwork-exempt: yes Co-authored-by: Daiki Ueno <ueno@gnu.org>
author Seiya Kawashima <skawashima@uchicago.edu>
date Fri, 03 Jul 2015 11:42:43 +0900
parents ab58d4870664
children
line wrap: on
line source

/* Tokenize UTF-8/UTF-16/UTF-32 string.
   Copyright (C) 1999, 2002, 2006, 2009-2015 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2002.

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as published
   by the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

UNIT *
FUNC (UNIT *str, const UNIT *delim, UNIT **ptr)
{
  if (str == NULL)
    {
      str = *ptr;
      if (str == NULL)
        return NULL; /* reminder that end of token sequence has been reached */
    }

  /* Skip leading delimiters.  */
  str += U_STRSPN (str, delim);

  /* Found a token?  */
  if (*str == 0)
    {
      *ptr = NULL;
      return NULL;
    }

  /* Move past the token. */
  {
    UNIT *token_end = U_STRPBRK (str, delim);
    if (token_end)
      {
        *ptr = token_end + U_STRMBLEN (token_end);
        /* NUL-terminate the token.  */
        *token_end = 0;
      }
    else
      *ptr = NULL;
  }

  return str;
}