Mercurial > hg > octave-kai > gnulib-hg
annotate lib/memchr.c @ 6075:ea0e673b670d
Recent regex patches.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Sat, 20 Aug 2005 01:03:31 +0000 |
parents | 4cb1b544f4cc |
children | 8a1a9361108c |
rev | line source |
---|---|
5159 | 1 /* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004 Free |
4664 | 2 Software Foundation, Inc. |
3 | |
884 | 4 Based on strlen implementation by Torbjorn Granlund (tege@sics.se), |
14 | 5 with help from Dan Sahlin (dan@sics.se) and |
6 commentary by Jim Blandy (jimb@ai.mit.edu); | |
7 adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), | |
8 and implemented by Roland McGrath (roland@ai.mit.edu). | |
9 | |
394 | 10 NOTE: The canonical source of this file is maintained with the GNU C Library. |
11 Bugs can be reported to bug-glibc@prep.ai.mit.edu. | |
14 | 12 |
394 | 13 This program is free software; you can redistribute it and/or modify it |
14 under the terms of the GNU General Public License as published by the | |
15 Free Software Foundation; either version 2, or (at your option) any | |
16 later version. | |
17 | |
18 This program is distributed in the hope that it will be useful, | |
14 | 19 but WITHOUT ANY WARRANTY; without even the implied warranty of |
394 | 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 GNU General Public License for more details. | |
14 | 22 |
394 | 23 You should have received a copy of the GNU General Public License |
6065 | 24 along with this program; if not, write to the Free Software Foundation, |
25 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ | |
14 | 26 |
166 | 27 #ifdef HAVE_CONFIG_H |
1872 | 28 # include <config.h> |
166 | 29 #endif |
30 | |
4664 | 31 #include <string.h> |
394 | 32 |
5159 | 33 #include <stddef.h> |
34 | |
2932 | 35 #if defined _LIBC |
36 # include <memcopy.h> | |
37 #else | |
38 # define reg_char char | |
448 | 39 #endif |
40 | |
4664 | 41 #include <limits.h> |
394 | 42 |
2932 | 43 #if HAVE_BP_SYM_H || defined _LIBC |
44 # include <bp-sym.h> | |
45 #else | |
46 # define BP_SYM(sym) sym | |
47 #endif | |
394 | 48 |
2932 | 49 #undef memchr |
50 #undef __memchr | |
394 | 51 |
14 | 52 /* Search no more than N bytes of S for C. */ |
4664 | 53 void * |
54 __memchr (void const *s, int c_in, size_t n) | |
14 | 55 { |
165
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
56 const unsigned char *char_ptr; |
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
57 const unsigned long int *longword_ptr; |
14 | 58 unsigned long int longword, magic_bits, charmask; |
2932 | 59 unsigned reg_char c; |
5159 | 60 int i; |
14 | 61 |
2932 | 62 c = (unsigned char) c_in; |
14 | 63 |
64 /* Handle the first few characters by reading one character at a time. | |
165
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
65 Do this until CHAR_PTR is aligned on a longword boundary. */ |
448 | 66 for (char_ptr = (const unsigned char *) s; |
5159 | 67 n > 0 && (size_t) char_ptr % sizeof longword != 0; |
14 | 68 --n, ++char_ptr) |
69 if (*char_ptr == c) | |
4664 | 70 return (void *) char_ptr; |
14 | 71 |
165
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
72 /* All these elucidatory comments refer to 4-byte longwords, |
5159 | 73 but the theory applies equally well to any size longwords. */ |
165
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
74 |
5159 | 75 longword_ptr = (const unsigned long int *) char_ptr; |
14 | 76 |
77 /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits | |
78 the "holes." Note that there is a hole just to the left of | |
79 each byte, with an extra at the end: | |
448 | 80 |
14 | 81 bits: 01111110 11111110 11111110 11111111 |
448 | 82 bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD |
14 | 83 |
84 The 1-bits make sure that carries propagate to the next 0-bit. | |
85 The 0-bits provide holes for carries to fall into. */ | |
394 | 86 |
5159 | 87 /* Set MAGIC_BITS to be this pattern of 1 and 0 bits. |
88 Set CHARMASK to be a longword, each of whose bytes is C. */ | |
394 | 89 |
5159 | 90 magic_bits = 0xfefefefe; |
14 | 91 charmask = c | (c << 8); |
92 charmask |= charmask << 16; | |
5159 | 93 #if 0xffffffffU < ULONG_MAX |
94 magic_bits |= magic_bits << 32; | |
400 | 95 charmask |= charmask << 32; |
5159 | 96 if (8 < sizeof longword) |
97 for (i = 64; i < sizeof longword * 8; i *= 2) | |
98 { | |
99 magic_bits |= magic_bits << i; | |
100 charmask |= charmask << i; | |
101 } | |
394 | 102 #endif |
5159 | 103 magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1); |
14 | 104 |
105 /* Instead of the traditional loop which tests each character, | |
106 we will test a longword at a time. The tricky part is testing | |
107 if *any of the four* bytes in the longword in question are zero. */ | |
5159 | 108 while (n >= sizeof longword) |
14 | 109 { |
110 /* We tentatively exit the loop if adding MAGIC_BITS to | |
111 LONGWORD fails to change any of the hole bits of LONGWORD. | |
112 | |
113 1) Is this safe? Will it catch all the zero bytes? | |
114 Suppose there is a byte with all zeros. Any carry bits | |
115 propagating from its left will fall into the hole at its | |
116 least significant bit and stop. Since there will be no | |
117 carry from its most significant bit, the LSB of the | |
118 byte to the left will be unchanged, and the zero will be | |
119 detected. | |
120 | |
121 2) Is this worthwhile? Will it ignore everything except | |
122 zero bytes? Suppose every byte of LONGWORD has a bit set | |
123 somewhere. There will be a carry into bit 8. If bit 8 | |
124 is set, this will carry into bit 16. If bit 8 is clear, | |
125 one of bits 9-15 must be set, so there will be a carry | |
126 into bit 16. Similarly, there will be a carry into bit | |
127 24. If one of bits 24-30 is set, there will be a carry | |
128 into bit 31, so all of the hole bits will be changed. | |
129 | |
130 The one misfire occurs when bits 24-30 are clear and bit | |
131 31 is set; in this case, the hole at bit 31 is not | |
132 changed. If we had access to the processor carry flag, | |
133 we could close this loophole by putting the fourth hole | |
134 at bit 32! | |
135 | |
136 So it ignores everything except 128's, when they're aligned | |
137 properly. | |
138 | |
139 3) But wait! Aren't we looking for C, not zero? | |
140 Good point. So what we do is XOR LONGWORD with a longword, | |
141 each of whose bytes is C. This turns each byte that is C | |
142 into a zero. */ | |
143 | |
144 longword = *longword_ptr++ ^ charmask; | |
145 | |
146 /* Add MAGIC_BITS to LONGWORD. */ | |
147 if ((((longword + magic_bits) | |
448 | 148 |
394 | 149 /* Set those bits that were unchanged by the addition. */ |
14 | 150 ^ ~longword) |
448 | 151 |
394 | 152 /* Look at only the hole bits. If any of the hole bits |
14 | 153 are unchanged, most likely one of the bytes was a |
154 zero. */ | |
155 & ~magic_bits) != 0) | |
156 { | |
157 /* Which of the bytes was C? If none of them were, it was | |
158 a misfire; continue the search. */ | |
159 | |
165
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
160 const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); |
14 | 161 |
162 if (cp[0] == c) | |
4664 | 163 return (void *) cp; |
14 | 164 if (cp[1] == c) |
4664 | 165 return (void *) &cp[1]; |
14 | 166 if (cp[2] == c) |
4664 | 167 return (void *) &cp[2]; |
14 | 168 if (cp[3] == c) |
4664 | 169 return (void *) &cp[3]; |
5159 | 170 if (4 < sizeof longword && cp[4] == c) |
4664 | 171 return (void *) &cp[4]; |
5159 | 172 if (5 < sizeof longword && cp[5] == c) |
4664 | 173 return (void *) &cp[5]; |
5159 | 174 if (6 < sizeof longword && cp[6] == c) |
4664 | 175 return (void *) &cp[6]; |
5159 | 176 if (7 < sizeof longword && cp[7] == c) |
4664 | 177 return (void *) &cp[7]; |
5159 | 178 if (8 < sizeof longword) |
179 for (i = 8; i < sizeof longword; i++) | |
180 if (cp[i] == c) | |
181 return (void *) &cp[i]; | |
14 | 182 } |
183 | |
5159 | 184 n -= sizeof longword; |
14 | 185 } |
186 | |
165
ae0780daedf2
* memchr.c (memchr): Do the 32-bit assignment only if !LONG_64_BITS.
Jim Meyering <jim@meyering.net>
parents:
14
diff
changeset
|
187 char_ptr = (const unsigned char *) longword_ptr; |
14 | 188 |
189 while (n-- > 0) | |
190 { | |
191 if (*char_ptr == c) | |
4664 | 192 return (void *) char_ptr; |
14 | 193 else |
194 ++char_ptr; | |
195 } | |
196 | |
197 return 0; | |
198 } | |
2932 | 199 #ifdef weak_alias |
200 weak_alias (__memchr, BP_SYM (memchr)) | |
201 #endif |