view lib/memchr2.c @ 16366:bb182ee4a09d

maint: replace FSF snail-mail addresses with URLs * config/argz.mk, lib/accept4.c, lib/alignof.h, lib/alloca.in.h: * lib/alphasort.c, lib/arcfour.c, lib/arcfour.h, lib/arctwo.c: * lib/arctwo.h, lib/argz.c, lib/arpa_inet.in.h, lib/asnprintf.c: * lib/asprintf.c, lib/assert.in.h, lib/base32.c, lib/base32.h: * lib/base64.c, lib/base64.h, lib/c-ctype.c, lib/c-ctype.h: * lib/c-strcase.h, lib/c-strcasecmp.c, lib/c-strncasecmp.c: * lib/check-version.c, lib/check-version.h, lib/config.charset: * lib/ctype.in.h, lib/des.c, lib/des.h, lib/dup3.c, lib/errno.in.h: * lib/float+.h, lib/fnmatch.c, lib/fnmatch.in.h, lib/fnmatch_loop.c: * lib/fseeko.c, lib/gai_strerror.c, lib/gc-gnulib.c: * lib/gc-libgcrypt.c, lib/gc-pbkdf2-sha1.c, lib/gc.h: * lib/getaddrinfo.c, lib/getdelim.c, lib/getfilecon.c, lib/getline.c: * lib/getlogin_r.c, lib/getpass.c, lib/getpass.h, lib/gettext.h: * lib/gettimeofday.c, lib/glob.in.h, lib/glthread/cond.c: * lib/glthread/cond.h, lib/glthread/lock.c, lib/glthread/lock.h: * lib/glthread/thread.c, lib/glthread/thread.h: * lib/glthread/threadlib.c, lib/glthread/yield.h, lib/hmac-md5.c: * lib/hmac-sha1.c, lib/hmac.h, lib/iconv.c, lib/iconv.in.h: * lib/iconv_close.c, lib/iconv_open.c, lib/inet_ntop.c, lib/isfinite.c: * lib/isinf.c, lib/iswblank.c, lib/langinfo.in.h, lib/link.c: * lib/localcharset.c, lib/localcharset.h, lib/lseek.c, lib/malloc.c: * lib/malloca.c, lib/malloca.h, lib/md2.c, lib/md2.h, lib/md4.c: * lib/md4.h, lib/md5.c, lib/md5.h, lib/memmem.c, lib/mempcpy.c: * lib/memset.c, lib/memxor.c, lib/memxor.h, lib/minmax.h, lib/mktime.c: * lib/msvc-inval.c, lib/msvc-inval.h, lib/msvc-nothrow.c: * lib/msvc-nothrow.h, lib/netdb.in.h, lib/netinet_in.in.h, lib/nproc.c: * lib/nproc.h, lib/obstack_printf.c, lib/pathmax.h, lib/pipe.c: * lib/pipe2.c, lib/poll.c, lib/poll.in.h, lib/printf-args.c: * lib/printf-args.h, lib/printf-parse.c, lib/printf-parse.h: * lib/pselect.c, lib/pthread.in.h, lib/pty-private.h, lib/pty.in.h: * lib/read-file.c, lib/read-file.h, lib/ref-add.sin, lib/ref-del.sin: * lib/regcomp.c, lib/regex.c, lib/regex.h, lib/regex_internal.c: * lib/regex_internal.h, lib/regexec.c, lib/rijndael-alg-fst.c: * lib/rijndael-alg-fst.h, lib/rijndael-api-fst.c: * lib/rijndael-api-fst.h, lib/rint.c, lib/rintf.c, lib/rintl.c: * lib/round.c, lib/roundf.c, lib/roundl.c, lib/scandir.c, lib/select.c: * lib/sha1.c, lib/sha1.h, lib/size_max.h, lib/snprintf.c: * lib/stdalign.in.h, lib/stdarg.in.h, lib/stdbool.in.h: * lib/stddef.in.h, lib/stdint.in.h, lib/stdio.in.h, lib/str-kmp.h: * lib/str-two-way.h, lib/strcasecmp.c, lib/strcasestr.c, lib/strdup.c: * lib/striconv.c, lib/striconv.h, lib/string.in.h, lib/strings.in.h: * lib/strncasecmp.c, lib/strndup.c, lib/strnlen.c, lib/strpbrk.c: * lib/strptime.c, lib/strsep.c, lib/strstr.c, lib/strverscmp.c: * lib/sys_file.in.h, lib/sys_ioctl.in.h, lib/sys_select.in.h: * lib/sys_socket.in.h, lib/sys_stat.in.h, lib/sys_time.in.h: * lib/sys_times.in.h, lib/sys_types.in.h, lib/sys_uio.in.h: * lib/sys_utsname.in.h, lib/sys_wait.in.h, lib/tcgetsid.c: * lib/termios.in.h, lib/time.in.h, lib/time_r.c, lib/timegm.c: * lib/times.c, lib/unictype/3level.h, lib/unictype/3levelbit.h: * lib/unistd.in.h, lib/vasnprintf.c, lib/vasnprintf.h, lib/vasprintf.c: * lib/vsnprintf.c, lib/waitpid.c, lib/wchar.in.h, lib/wctype.in.h: * lib/xsize.h, tests/test-closein.c, tests/test-des.c: * tests/test-fclose.c, tests/test-fgetc.c, tests/test-filevercmp.c: * tests/test-fputc.c, tests/test-fread.c, tests/test-fwrite.c: * tests/test-gc-arcfour.c, tests/test-gc-arctwo.c, tests/test-gc-des.c: * tests/test-gc-hmac-md5.c, tests/test-gc-hmac-sha1.c: * tests/test-gc-md2.c, tests/test-gc-md4.c, tests/test-gc-md5.c: * tests/test-gc-pbkdf2-sha1.c, tests/test-gc-rijndael.c: * tests/test-gc-sha1.c, tests/test-gc.c, tests/test-getdelim.c: * tests/test-getline.c, tests/test-getndelim2.c, tests/test-md2.c: * tests/test-md4.c, tests/test-parse-datetime.c, tests/test-perror.c: * tests/test-perror2.c, tests/test-pipe.c, tests/test-pipe2.c: * tests/test-poll.c, tests/test-quotearg-simple.c: * tests/test-quotearg.c, tests/test-quotearg.h: * tests/test-round-ieee.c, tests/test-round1.c: * tests/test-roundf-ieee.c, tests/test-roundf1.c: * tests/test-roundl-ieee.c, tests/test-roundl.c: * tests/test-safe-alloc.c, tests/test-sigpipe.c: * tests/test-spawn-pipe-child.c, tests/test-spawn-pipe-main.c: * tests/test-strerror.c, tests/test-strerror_r.c: * tests/test-strsignal.c, tests/test-strverscmp.c: * tests/test-xmemdup0.c: Replace FSF snail mail addresses with URLs, as per GNU coding standards. See glibc bug <http://sourceware.org/bugzilla/show_bug.cgi?id=13673>.
author Paul Eggert <eggert@cs.ucla.edu>
date Thu, 09 Feb 2012 21:39:05 -0800 (2012-02-10)
parents a712776b11ce
children e542fd46ad6f
line wrap: on
line source
/* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2012
   Free Software Foundation, Inc.

   Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
   with help from Dan Sahlin (dan@sics.se) and
   commentary by Jim Blandy (jimb@ai.mit.edu);
   adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
   and implemented in glibc by Roland McGrath (roland@ai.mit.edu).
   Extension to memchr2 implemented by Eric Blake (ebb9@byu.net).

This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or any
later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <config.h>

#include "memchr2.h"

#include <limits.h>
#include <stdint.h>
#include <string.h>

/* Return the first address of either C1 or C2 (treated as unsigned
   char) that occurs within N bytes of the memory region S.  If
   neither byte appears, return NULL.  */
void *
memchr2 (void const *s, int c1_in, int c2_in, size_t n)
{
  /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
     long instead of a 64-bit uintmax_t tends to give better
     performance.  On 64-bit hardware, unsigned long is generally 64
     bits already.  Change this typedef to experiment with
     performance.  */
  typedef unsigned long int longword;

  const unsigned char *char_ptr;
  const longword *longword_ptr;
  longword repeated_one;
  longword repeated_c1;
  longword repeated_c2;
  unsigned char c1;
  unsigned char c2;

  c1 = (unsigned char) c1_in;
  c2 = (unsigned char) c2_in;

  if (c1 == c2)
    return memchr (s, c1, n);

  /* Handle the first few bytes by reading one byte at a time.
     Do this until CHAR_PTR is aligned on a longword boundary.  */
  for (char_ptr = (const unsigned char *) s;
       n > 0 && (size_t) char_ptr % sizeof (longword) != 0;
       --n, ++char_ptr)
    if (*char_ptr == c1 || *char_ptr == c2)
      return (void *) char_ptr;

  longword_ptr = (const longword *) char_ptr;

  /* All these elucidatory comments refer to 4-byte longwords,
     but the theory applies equally well to any size longwords.  */

  /* Compute auxiliary longword values:
     repeated_one is a value which has a 1 in every byte.
     repeated_c1 has c1 in every byte.
     repeated_c2 has c2 in every byte.  */
  repeated_one = 0x01010101;
  repeated_c1 = c1 | (c1 << 8);
  repeated_c2 = c2 | (c2 << 8);
  repeated_c1 |= repeated_c1 << 16;
  repeated_c2 |= repeated_c2 << 16;
  if (0xffffffffU < (longword) -1)
    {
      repeated_one |= repeated_one << 31 << 1;
      repeated_c1 |= repeated_c1 << 31 << 1;
      repeated_c2 |= repeated_c2 << 31 << 1;
      if (8 < sizeof (longword))
        {
          size_t i;

          for (i = 64; i < sizeof (longword) * 8; i *= 2)
            {
              repeated_one |= repeated_one << i;
              repeated_c1 |= repeated_c1 << i;
              repeated_c2 |= repeated_c2 << i;
            }
        }
    }

  /* Instead of the traditional loop which tests each byte, we will test a
     longword at a time.  The tricky part is testing if *any of the four*
     bytes in the longword in question are equal to c1 or c2.  We first use
     an xor with repeated_c1 and repeated_c2, respectively.  This reduces
     the task to testing whether *any of the four* bytes in longword1 or
     longword2 is zero.

     Let's consider longword1.  We compute tmp1 =
       ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
     That is, we perform the following operations:
       1. Subtract repeated_one.
       2. & ~longword1.
       3. & a mask consisting of 0x80 in every byte.
     Consider what happens in each byte:
       - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
         and step 3 transforms it into 0x80.  A carry can also be propagated
         to more significant bytes.
       - If a byte of longword1 is nonzero, let its lowest 1 bit be at
         position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
         the byte ends in a single bit of value 0 and k bits of value 1.
         After step 2, the result is just k bits of value 1: 2^k - 1.  After
         step 3, the result is 0.  And no carry is produced.
     So, if longword1 has only non-zero bytes, tmp1 is zero.
     Whereas if longword1 has a zero byte, call j the position of the least
     significant zero byte.  Then the result has a zero at positions 0, ...,
     j-1 and a 0x80 at position j.  We cannot predict the result at the more
     significant bytes (positions j+1..3), but it does not matter since we
     already have a non-zero bit at position 8*j+7.

     Similarly, we compute tmp2 =
       ((longword2 - repeated_one) & ~longword2) & (repeated_one << 7).

     The test whether any byte in longword1 or longword2 is zero is equivalent
     to testing whether tmp1 is nonzero or tmp2 is nonzero.  We can combine
     this into a single test, whether (tmp1 | tmp2) is nonzero.  */

  while (n >= sizeof (longword))
    {
      longword longword1 = *longword_ptr ^ repeated_c1;
      longword longword2 = *longword_ptr ^ repeated_c2;

      if (((((longword1 - repeated_one) & ~longword1)
            | ((longword2 - repeated_one) & ~longword2))
           & (repeated_one << 7)) != 0)
        break;
      longword_ptr++;
      n -= sizeof (longword);
    }

  char_ptr = (const unsigned char *) longword_ptr;

  /* At this point, we know that either n < sizeof (longword), or one of the
     sizeof (longword) bytes starting at char_ptr is == c1 or == c2.  On
     little-endian machines, we could determine the first such byte without
     any further memory accesses, just by looking at the (tmp1 | tmp2) result
     from the last loop iteration.  But this does not work on big-endian
     machines.  Choose code that works in both cases.  */

  for (; n > 0; --n, ++char_ptr)
    {
      if (*char_ptr == c1 || *char_ptr == c2)
        return (void *) char_ptr;
    }

  return NULL;
}