Mercurial > hg > octave-shane > gnulib-hg
changeset 6057:a878a8d58823
Make strstr() work in multibyte locales.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Wed, 17 Aug 2005 14:05:33 +0000 |
parents | 4838606fdc03 |
children | a7440145d6a9 |
files | ChangeLog lib/ChangeLog lib/strstr.c lib/strstr.h m4/ChangeLog m4/strstr.m4 modules/strstr |
diffstat | 7 files changed, 130 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2005-08-17 Bruno Haible <bruno@clisp.org> + + * modules/strstr (Files): Add m4/mbrtowc.m4. + (Depends-on): Add mbuiter. + 2005-08-17 Bruno Haible <bruno@clisp.org> * modules/strcase (Depends-on): Add mbuiter. Remove strnlen1, mbchar.
--- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,8 @@ +2005-08-17 Bruno Haible <bruno@clisp.org> + + * strstr.h: Ignore HAVE_STRSTR, always declare the gnulib function. + * strstr.c: Completely rewritten, with multibyte locale support. + 2005-08-17 Bruno Haible <bruno@clisp.org> * strcasecmp.c: Use mbuiter.h.
--- a/lib/strstr.c +++ b/lib/strstr.c @@ -1,119 +1,126 @@ -/* Copyright (C) 1994, 1999, 2002-2003 Free Software Foundation, Inc. -This file is part of the GNU C Library. +/* Searching in a string. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2005. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -/* - * My personal strstr() implementation that beats most other algorithms. - * Until someone tells me otherwise, I assume that this is the - * fastest implementation of strstr() in C. - * I deliberately chose not to comment it. You should have at least - * as much fun trying to understand it, as I had to write it :-). - * - * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #if HAVE_CONFIG_H # include <config.h> #endif -#include <string.h> - -typedef unsigned chartype; +/* Specification. */ +#include "strstr.h" -#undef strstr +#if HAVE_MBRTOWC +# include "mbuiter.h" +#endif +/* Find the first occurrence of NEEDLE in HAYSTACK. */ char * -strstr (const char *phaystack, const char *pneedle) +strstr (const char *haystack, const char *needle) { - register const unsigned char *haystack, *needle; - register chartype b, c; - - haystack = (const unsigned char *) phaystack; - needle = (const unsigned char *) pneedle; - - b = *needle; - if (b != '\0') + /* Be careful not to look at the entire extent of haystack or needle + until needed. This is useful because of these two cases: + - haystack may be very long, and a match of needle found early, + - needle may be very long, and not even a short initial segment of + needle may be found in haystack. */ +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) { - haystack--; /* possible ANSI violation */ - do - { - c = *++haystack; - if (c == '\0') - goto ret0; - } - while (c != b); + mbui_iterator_t iter_needle; - c = *++needle; - if (c == '\0') - goto foundneedle; - ++needle; - goto jin; + mbui_init (iter_needle, needle); + if (mbui_avail (iter_needle)) + { + mbui_iterator_t iter_haystack; - for (;;) - { - register chartype a; - register const unsigned char *rhaystack, *rneedle; - - do + mbui_init (iter_haystack, haystack); + for (;; mbui_advance (iter_haystack)) { - a = *++haystack; - if (a == '\0') - goto ret0; - if (a == b) - break; - a = *++haystack; - if (a == '\0') - goto ret0; -shloop:; } - while (a != b); + if (!mbui_avail (iter_haystack)) + /* No match. */ + return NULL; -jin: a = *++haystack; - if (a == '\0') - goto ret0; + if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle))) + /* The first character matches. */ + { + mbui_iterator_t rhaystack; + mbui_iterator_t rneedle; - if (a != c) - goto shloop; + memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t)); + mbui_advance (rhaystack); - rhaystack = haystack-- + 1; - rneedle = needle; - a = *rneedle; + mbui_init (rneedle, needle); + if (!mbui_avail (rneedle)) + abort (); + mbui_advance (rneedle); - if (*rhaystack == a) - do - { - if (a == '\0') - goto foundneedle; - ++rhaystack; - a = *++needle; - if (*rhaystack != a) - break; - if (a == '\0') - goto foundneedle; - ++rhaystack; - a = *++needle; - } - while (*rhaystack == a); + for (;; mbui_advance (rhaystack), mbui_advance (rneedle)) + { + if (!mbui_avail (rneedle)) + /* Found a match. */ + return (char *) haystack; + if (!mbui_avail (rhaystack)) + /* No match. */ + return NULL; + if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle))) + /* Nothing in this round. */ + break; + } + } + } + } + else + return (char *) haystack; + } + else +#endif + { + if (*needle != '\0') + { + /* Speed up the following searches of needle by caching its first + character. */ + char b = *needle++; - needle = rneedle; /* took the register-poor approach */ + for (;; haystack++) + { + if (*haystack == '\0') + /* No match. */ + return NULL; + if (*haystack == b) + /* The first character matches. */ + { + const char *rhaystack = haystack + 1; + const char *rneedle = needle; - if (a == '\0') - break; - } + for (;; rhaystack++, rneedle++) + { + if (*rneedle == '\0') + /* Found a match. */ + return (char *) haystack; + if (*rhaystack == '\0') + /* No match. */ + return NULL; + if (*rhaystack != *rneedle) + /* Nothing in this round. */ + break; + } + } + } + } + else + return (char *) haystack; } -foundneedle: - return (char*) haystack; -ret0: - return 0; }
--- a/lib/strstr.h +++ b/lib/strstr.h @@ -1,5 +1,5 @@ /* Searching in a string. - Copyright (C) 2001-2003 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2005 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,13 +15,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#if HAVE_STRSTR - -/* Get strstr() declaration. */ -#include <string.h> - -#else - #ifdef __cplusplus extern "C" { #endif @@ -32,5 +25,3 @@ #ifdef __cplusplus } #endif - -#endif
--- a/m4/ChangeLog +++ b/m4/ChangeLog @@ -1,3 +1,8 @@ +2005-08-17 Bruno Haible <bruno@clisp.org> + + * strstr.m4 (gl_FUNC_STRSTR): Use the replacement function always. + (gl_PREREQ_STRSTR): Use gl_FUNC_MBRTOWC. + 2005-08-16 Paul Eggert <eggert@cs.ucla.edu> * getopt.m4 (gl_GETOPT_CHECK_HEADERS): Do not override the results
--- a/m4/strstr.m4 +++ b/m4/strstr.m4 @@ -1,16 +1,19 @@ -# strstr.m4 serial 2 -dnl Copyright (C) 2002-2003 Free Software Foundation, Inc. +# strstr.m4 serial 3 +dnl Copyright (C) 2002-2003, 2005 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_STRSTR], [ - AC_REPLACE_FUNCS(strstr) - if test $ac_cv_func_strstr = no; then - gl_PREREQ_STRSTR - fi + dnl No known system has a strstr() function that works correctly in + dnl multibyte locales. Therefore we use our version always. + AC_LIBOBJ(strstr) + AC_DEFINE(strstr, rpl_strstr, [Define to rpl_strstr always.]) + gl_PREREQ_STRSTR ]) # Prerequisites of lib/strstr.c. -AC_DEFUN([gl_PREREQ_STRSTR], [:]) +AC_DEFUN([gl_PREREQ_STRSTR], [ + gl_FUNC_MBRTOWC +])