annotate lib/strcasestr.c @ 9623:69d9307c0aa0

Convert strcasestr module to use Two-Way algorithm. * modules/strcasestr-simple: New module, based on the old strcasestr, but with Two-Way rather than KMP. * modules/strcasestr (Depends-on): Change to strcasestr-simple. * lib/string.in.h (rpl_strcasestr): Declare. * m4/strcasestr.m4 (gl_FUNC_STRCASESTR): Check for linear performance. * lib/strcasestr.c (strcasestr): Simplify, and avoid malloc. * modules/string (Makefile.am): Support strcasestr. * m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Likewise. * modules/strcasestr-tests (Depends-on): Check for alarm. * tests/test-strcasestr.c: Augment test. * lib/str-two-way.h: Clean up stray macro. * NEWS: Document new module. * MODULES.html.sh (string handling): Likewise. * doc/functions/strcasestr.texi: New file. * doc/gnulib.texi (Function Substitutes): New node. Move memmem here, since it is not a POSIX function. Signed-off-by: Eric Blake <ebb9@byu.net>
author Eric Blake <ebb9@byu.net>
date Thu, 10 Jan 2008 22:22:51 -0700
parents d722bd5e44bd
children e8d2c6fc33ad
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
1 /* Case-insensitive searching in a string.
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
2 Copyright (C) 2005-2008 Free Software Foundation, Inc.
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
3 Written by Bruno Haible <bruno@clisp.org>, 2005.
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
4
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
5 This program is free software; you can redistribute it and/or modify
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
6 it under the terms of the GNU General Public License as published by
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
7 the Free Software Foundation; either version 2, or (at your option)
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
8 any later version.
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
9
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
10 This program is distributed in the hope that it will be useful,
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
13 GNU General Public License for more details.
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
14
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
15 You should have received a copy of the GNU General Public License
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
16 along with this program; if not, write to the Free Software Foundation,
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
18
7304
1c4ed7637c24 Include <config.h> unconditionally.
Bruno Haible <bruno@clisp.org>
parents: 6164
diff changeset
19 #include <config.h>
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
20
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
21 /* Specification. */
7980
89b6dfd07076 Declare strcasestr() in the <string.h> replacement, rather than in
Bruno Haible <bruno@clisp.org>
parents: 7304
diff changeset
22 #include <string.h>
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
23
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
24 #include <ctype.h>
8125
0545abcdfb09 Ensure O(n) worst-case complexity of strcasestr substitute.
Bruno Haible <bruno@clisp.org>
parents: 8098
diff changeset
25 #include <stdbool.h>
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
26 #include <strings.h>
8145
e8d8167b9164 Optimize memory allocation to use alloca when possible.
Bruno Haible <bruno@clisp.org>
parents: 8125
diff changeset
27
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
28 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
29
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
30 /* Two-Way algorithm. */
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
31 #define RETURN_TYPE char *
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
32 #define AVAILABLE(h, h_l, j, n_l) \
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
33 (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
34 && ((h_l) = (j) + (n_l)))
9561
d722bd5e44bd Unify 5 copies of the KMP code.
Bruno Haible <bruno@clisp.org>
parents: 9558
diff changeset
35 #define CANON_ELEMENT(c) TOLOWER (c)
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
36 #define CMP_FUNC(p1, p2, l) \
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
37 strncasecmp ((const char *) (p1), (const char *) (p2), l)
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
38 #include "str-two-way.h"
8125
0545abcdfb09 Ensure O(n) worst-case complexity of strcasestr substitute.
Bruno Haible <bruno@clisp.org>
parents: 8098
diff changeset
39
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
40 /* Find the first occurrence of NEEDLE in HAYSTACK, using
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
41 case-insensitive comparison. This function gives unspecified
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
42 results in multibyte locales. */
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
43 char *
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
44 strcasestr (const char *haystack_start, const char *needle_start)
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
45 {
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
46 const char *haystack = haystack_start;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
47 const char *needle = needle_start;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
48 size_t needle_len; /* Length of NEEDLE. */
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
49 size_t haystack_len; /* Known minimum length of HAYSTACK. */
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
50 bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
8125
0545abcdfb09 Ensure O(n) worst-case complexity of strcasestr substitute.
Bruno Haible <bruno@clisp.org>
parents: 8098
diff changeset
51
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
52 /* Determine length of NEEDLE, and in the process, make sure
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
53 HAYSTACK is at least as long (no point processing all of a long
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
54 NEEDLE if HAYSTACK is too short). */
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
55 while (*haystack && *needle)
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
56 {
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
57 ok &= (TOLOWER ((unsigned char) *haystack)
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
58 == TOLOWER ((unsigned char) *needle));
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
59 haystack++;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
60 needle++;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
61 }
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
62 if (*needle)
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
63 return NULL;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
64 if (ok)
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
65 return (char *) haystack_start;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
66 needle_len = needle - needle_start;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
67 haystack = haystack_start + 1;
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
68 haystack_len = needle_len - 1;
8125
0545abcdfb09 Ensure O(n) worst-case complexity of strcasestr substitute.
Bruno Haible <bruno@clisp.org>
parents: 8098
diff changeset
69
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
70 /* Perform the search. Abstract memory is considered to be an array
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
71 of 'unsigned char' values, not an array of 'char' values. See
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
72 ISO C 99 section 6.2.6.1. */
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
73 if (needle_len < LONG_NEEDLE_THRESHOLD)
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
74 return two_way_short_needle ((const unsigned char *) haystack,
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
75 haystack_len,
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
76 (const unsigned char *) needle_start,
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
77 needle_len);
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
78 return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
79 (const unsigned char *) needle_start,
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
80 needle_len);
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
81 }
6058
a7440145d6a9 New module 'strcasestr'.
Bruno Haible <bruno@clisp.org>
parents:
diff changeset
82
9623
69d9307c0aa0 Convert strcasestr module to use Two-Way algorithm.
Eric Blake <ebb9@byu.net>
parents: 9561
diff changeset
83 #undef LONG_NEEDLE_THRESHOLD