Mercurial > hg > octave-kai > gnulib-hg
annotate lib/mbuiter.h @ 9258:bee5960c276a
Rename search_.h to search.in.h.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Tue, 02 Oct 2007 00:24:48 +0200 |
parents | cbc204793cf7 |
children | bbbbbf4cd1c5 |
rev | line source |
---|---|
6055 | 1 /* Iterating through multibyte strings: macros for multi-byte encodings. |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
2 Copyright (C) 2001, 2005, 2007 Free Software Foundation, Inc. |
6055 | 3 |
4 This program is free software; you can redistribute it and/or modify | |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2, or (at your option) | |
7 any later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software Foundation, | |
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
17 | |
18 /* Written by Bruno Haible <bruno@clisp.org>. */ | |
19 | |
20 /* The macros in this file implement forward iteration through a | |
21 multi-byte string, without knowing its length a-priori. | |
22 | |
23 With these macros, an iteration loop that looks like | |
24 | |
25 char *iter; | |
26 for (iter = buf; *iter != '\0'; iter++) | |
27 { | |
28 do_something (*iter); | |
29 } | |
30 | |
31 becomes | |
32 | |
33 mbui_iterator_t iter; | |
34 for (mbui_init (iter, buf); mbui_avail (iter); mbui_advance (iter)) | |
35 { | |
36 do_something (mbui_cur_ptr (iter), mb_len (mbui_cur (iter))); | |
37 } | |
38 | |
39 The benefit of these macros over plain use of mbrtowc is: | |
40 - Handling of invalid multibyte sequences is possible without | |
41 making the code more complicated, while still preserving the | |
42 invalid multibyte sequences. | |
43 | |
44 Compared to mbiter.h, the macros here don't need to know the string's | |
45 length a-priori. The downside is that at each step, the look-ahead | |
46 that guards against overrunning the terminating '\0' is more expensive. | |
47 The mbui_* macros are therefore suitable when there is a high probability | |
48 that only the first few multibyte characters need to be inspected. | |
49 Whereas the mbi_* macros are better if usually the iteration runs | |
50 through the entire string. | |
51 | |
52 mbui_iterator_t | |
53 is a type usable for variable declarations. | |
54 | |
55 mbui_init (iter, startptr) | |
56 initializes the iterator, starting at startptr. | |
57 | |
58 mbui_avail (iter) | |
59 returns true if there are more multibyte chracters available before | |
60 the end of string is reached. In this case, mbui_cur (iter) is | |
61 initialized to the next multibyte chracter. | |
62 | |
63 mbui_advance (iter) | |
64 advances the iterator by one multibyte character. | |
65 | |
66 mbui_cur (iter) | |
67 returns the current multibyte character, of type mbchar_t. All the | |
68 macros defined in mbchar.h can be used on it. | |
69 | |
70 mbui_cur_ptr (iter) | |
71 return a pointer to the beginning of the current multibyte character. | |
72 | |
73 mbui_reloc (iter, ptrdiff) | |
74 relocates iterator when the string is moved by ptrdiff bytes. | |
75 | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
76 mbui_copy (&destiter, &srciter) |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
77 copies srciter to destiter. |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
78 |
6055 | 79 Here are the function prototypes of the macros. |
80 | |
81 extern void mbui_init (mbui_iterator_t iter, const char *startptr); | |
82 extern bool mbui_avail (mbui_iterator_t iter); | |
83 extern void mbui_advance (mbui_iterator_t iter); | |
84 extern mbchar_t mbui_cur (mbui_iterator_t iter); | |
85 extern const char * mbui_cur_ptr (mbui_iterator_t iter); | |
86 extern void mbui_reloc (mbui_iterator_t iter, ptrdiff_t ptrdiff); | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
87 extern void mbui_copy (mbui_iterator_t *new, const mbui_iterator_t *old); |
6055 | 88 */ |
89 | |
90 #ifndef _MBUITER_H | |
91 #define _MBUITER_H 1 | |
92 | |
93 #include <assert.h> | |
94 #include <stdbool.h> | |
8966
cbc204793cf7
Include <stddef.h>, needed for ptrdiff_t.
Bruno Haible <bruno@clisp.org>
parents:
8127
diff
changeset
|
95 #include <stddef.h> |
6055 | 96 #include <stdlib.h> |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
97 #include <string.h> |
6055 | 98 |
99 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before | |
100 <wchar.h>. | |
101 BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before | |
102 <wchar.h>. */ | |
103 #include <stdio.h> | |
104 #include <time.h> | |
105 #include <wchar.h> | |
106 | |
107 #include "mbchar.h" | |
108 #include "strnlen1.h" | |
109 | |
110 struct mbuiter_multi | |
111 { | |
112 bool in_shift; /* true if next byte may not be interpreted as ASCII */ | |
113 mbstate_t state; /* if in_shift: current shift state */ | |
114 bool next_done; /* true if mbui_avail has already filled the following */ | |
115 struct mbchar cur; /* the current character: | |
116 const char *cur.ptr pointer to current character | |
117 The following are only valid after mbui_avail. | |
118 size_t cur.bytes number of bytes of current character | |
119 bool cur.wc_valid true if wc is a valid wide character | |
120 wchar_t cur.wc if wc_valid: the current character | |
121 */ | |
122 }; | |
123 | |
124 static inline void | |
125 mbuiter_multi_next (struct mbuiter_multi *iter) | |
126 { | |
127 if (iter->next_done) | |
128 return; | |
129 if (iter->in_shift) | |
130 goto with_shift; | |
131 /* Handle most ASCII characters quickly, without calling mbrtowc(). */ | |
132 if (is_basic (*iter->cur.ptr)) | |
133 { | |
134 /* These characters are part of the basic character set. ISO C 99 | |
135 guarantees that their wide character code is identical to their | |
136 char code. */ | |
137 iter->cur.bytes = 1; | |
138 iter->cur.wc = *iter->cur.ptr; | |
139 iter->cur.wc_valid = true; | |
140 } | |
141 else | |
142 { | |
143 assert (mbsinit (&iter->state)); | |
144 iter->in_shift = true; | |
145 with_shift: | |
146 iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr, | |
147 strnlen1 (iter->cur.ptr, MB_CUR_MAX), | |
148 &iter->state); | |
149 if (iter->cur.bytes == (size_t) -1) | |
150 { | |
151 /* An invalid multibyte sequence was encountered. */ | |
152 iter->cur.bytes = 1; | |
153 iter->cur.wc_valid = false; | |
154 /* Whether to set iter->in_shift = false and reset iter->state | |
155 or not is not very important; the string is bogus anyway. */ | |
156 } | |
157 else if (iter->cur.bytes == (size_t) -2) | |
158 { | |
159 /* An incomplete multibyte character at the end. */ | |
160 iter->cur.bytes = strlen (iter->cur.ptr); | |
161 iter->cur.wc_valid = false; | |
162 /* Whether to set iter->in_shift = false and reset iter->state | |
163 or not is not important; the string end is reached anyway. */ | |
164 } | |
165 else | |
166 { | |
167 if (iter->cur.bytes == 0) | |
168 { | |
169 /* A null wide character was encountered. */ | |
170 iter->cur.bytes = 1; | |
171 assert (*iter->cur.ptr == '\0'); | |
172 assert (iter->cur.wc == 0); | |
173 } | |
174 iter->cur.wc_valid = true; | |
175 | |
176 /* When in the initial state, we can go back treating ASCII | |
177 characters more quickly. */ | |
178 if (mbsinit (&iter->state)) | |
179 iter->in_shift = false; | |
180 } | |
181 } | |
182 iter->next_done = true; | |
183 } | |
184 | |
185 static inline void | |
186 mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff) | |
187 { | |
188 iter->cur.ptr += ptrdiff; | |
189 } | |
190 | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
191 static inline void |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
192 mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi *old_iter) |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
193 { |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
194 if ((new_iter->in_shift = old_iter->in_shift)) |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
195 memcpy (&new_iter->state, &old_iter->state, sizeof (mbstate_t)); |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
196 else |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
197 memset (&new_iter->state, 0, sizeof (mbstate_t)); |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
198 new_iter->next_done = old_iter->next_done; |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
199 mb_copy (&new_iter->cur, &old_iter->cur); |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
200 } |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
201 |
6055 | 202 /* Iteration macros. */ |
203 typedef struct mbuiter_multi mbui_iterator_t; | |
204 #define mbui_init(iter, startptr) \ | |
205 ((iter).cur.ptr = (startptr), \ | |
206 (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \ | |
207 (iter).next_done = false) | |
208 #define mbui_avail(iter) \ | |
209 (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur)) | |
210 #define mbui_advance(iter) \ | |
211 ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) | |
212 | |
213 /* Access to the current character. */ | |
214 #define mbui_cur(iter) (iter).cur | |
215 #define mbui_cur_ptr(iter) (iter).cur.ptr | |
216 | |
217 /* Relocation. */ | |
218 #define mbui_reloc(iter, ptrdiff) mbuiter_multi_reloc (&iter, ptrdiff) | |
219 | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
220 /* Copying an iterator. */ |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
221 #define mbui_copy mbuiter_multi_copy |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
222 |
6055 | 223 #endif /* _MBUITER_H */ |