Mercurial > hg > octave-nkf > gnulib-hg
annotate lib/mbuiter.h @ 9659:5680cf5b5595
md2: clarify comments to say that alignment is not required.
* lib/md2.h: Remove warning about alignment in comment.
* lib/md2.c (md2_read_ctx, md2_finish_ctx): Doc fix, alignment
has never been required.
author | Simon Josefsson <simon@josefsson.org> |
---|---|
date | Thu, 31 Jan 2008 11:10:36 +0100 |
parents | bbbbbf4cd1c5 |
children | e8d2c6fc33ad |
rev | line source |
---|---|
6055 | 1 /* Iterating through multibyte strings: macros for multi-byte encodings. |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
2 Copyright (C) 2001, 2005, 2007 Free Software Foundation, Inc. |
6055 | 3 |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8966
diff
changeset
|
4 This program is free software: you can redistribute it and/or modify |
6055 | 5 it under the terms of the GNU General Public License as published by |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8966
diff
changeset
|
6 the Free Software Foundation; either version 3 of the License, or |
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8966
diff
changeset
|
7 (at your option) any later version. |
6055 | 8 |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
8966
diff
changeset
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
6055 | 16 |
17 /* Written by Bruno Haible <bruno@clisp.org>. */ | |
18 | |
19 /* The macros in this file implement forward iteration through a | |
20 multi-byte string, without knowing its length a-priori. | |
21 | |
22 With these macros, an iteration loop that looks like | |
23 | |
24 char *iter; | |
25 for (iter = buf; *iter != '\0'; iter++) | |
26 { | |
27 do_something (*iter); | |
28 } | |
29 | |
30 becomes | |
31 | |
32 mbui_iterator_t iter; | |
33 for (mbui_init (iter, buf); mbui_avail (iter); mbui_advance (iter)) | |
34 { | |
35 do_something (mbui_cur_ptr (iter), mb_len (mbui_cur (iter))); | |
36 } | |
37 | |
38 The benefit of these macros over plain use of mbrtowc is: | |
39 - Handling of invalid multibyte sequences is possible without | |
40 making the code more complicated, while still preserving the | |
41 invalid multibyte sequences. | |
42 | |
43 Compared to mbiter.h, the macros here don't need to know the string's | |
44 length a-priori. The downside is that at each step, the look-ahead | |
45 that guards against overrunning the terminating '\0' is more expensive. | |
46 The mbui_* macros are therefore suitable when there is a high probability | |
47 that only the first few multibyte characters need to be inspected. | |
48 Whereas the mbi_* macros are better if usually the iteration runs | |
49 through the entire string. | |
50 | |
51 mbui_iterator_t | |
52 is a type usable for variable declarations. | |
53 | |
54 mbui_init (iter, startptr) | |
55 initializes the iterator, starting at startptr. | |
56 | |
57 mbui_avail (iter) | |
58 returns true if there are more multibyte chracters available before | |
59 the end of string is reached. In this case, mbui_cur (iter) is | |
60 initialized to the next multibyte chracter. | |
61 | |
62 mbui_advance (iter) | |
63 advances the iterator by one multibyte character. | |
64 | |
65 mbui_cur (iter) | |
66 returns the current multibyte character, of type mbchar_t. All the | |
67 macros defined in mbchar.h can be used on it. | |
68 | |
69 mbui_cur_ptr (iter) | |
70 return a pointer to the beginning of the current multibyte character. | |
71 | |
72 mbui_reloc (iter, ptrdiff) | |
73 relocates iterator when the string is moved by ptrdiff bytes. | |
74 | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
75 mbui_copy (&destiter, &srciter) |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
76 copies srciter to destiter. |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
77 |
6055 | 78 Here are the function prototypes of the macros. |
79 | |
80 extern void mbui_init (mbui_iterator_t iter, const char *startptr); | |
81 extern bool mbui_avail (mbui_iterator_t iter); | |
82 extern void mbui_advance (mbui_iterator_t iter); | |
83 extern mbchar_t mbui_cur (mbui_iterator_t iter); | |
84 extern const char * mbui_cur_ptr (mbui_iterator_t iter); | |
85 extern void mbui_reloc (mbui_iterator_t iter, ptrdiff_t ptrdiff); | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
86 extern void mbui_copy (mbui_iterator_t *new, const mbui_iterator_t *old); |
6055 | 87 */ |
88 | |
89 #ifndef _MBUITER_H | |
90 #define _MBUITER_H 1 | |
91 | |
92 #include <assert.h> | |
93 #include <stdbool.h> | |
8966
cbc204793cf7
Include <stddef.h>, needed for ptrdiff_t.
Bruno Haible <bruno@clisp.org>
parents:
8127
diff
changeset
|
94 #include <stddef.h> |
6055 | 95 #include <stdlib.h> |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
96 #include <string.h> |
6055 | 97 |
98 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before | |
99 <wchar.h>. | |
100 BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before | |
101 <wchar.h>. */ | |
102 #include <stdio.h> | |
103 #include <time.h> | |
104 #include <wchar.h> | |
105 | |
106 #include "mbchar.h" | |
107 #include "strnlen1.h" | |
108 | |
109 struct mbuiter_multi | |
110 { | |
111 bool in_shift; /* true if next byte may not be interpreted as ASCII */ | |
112 mbstate_t state; /* if in_shift: current shift state */ | |
113 bool next_done; /* true if mbui_avail has already filled the following */ | |
114 struct mbchar cur; /* the current character: | |
115 const char *cur.ptr pointer to current character | |
116 The following are only valid after mbui_avail. | |
117 size_t cur.bytes number of bytes of current character | |
118 bool cur.wc_valid true if wc is a valid wide character | |
119 wchar_t cur.wc if wc_valid: the current character | |
120 */ | |
121 }; | |
122 | |
123 static inline void | |
124 mbuiter_multi_next (struct mbuiter_multi *iter) | |
125 { | |
126 if (iter->next_done) | |
127 return; | |
128 if (iter->in_shift) | |
129 goto with_shift; | |
130 /* Handle most ASCII characters quickly, without calling mbrtowc(). */ | |
131 if (is_basic (*iter->cur.ptr)) | |
132 { | |
133 /* These characters are part of the basic character set. ISO C 99 | |
134 guarantees that their wide character code is identical to their | |
135 char code. */ | |
136 iter->cur.bytes = 1; | |
137 iter->cur.wc = *iter->cur.ptr; | |
138 iter->cur.wc_valid = true; | |
139 } | |
140 else | |
141 { | |
142 assert (mbsinit (&iter->state)); | |
143 iter->in_shift = true; | |
144 with_shift: | |
145 iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr, | |
146 strnlen1 (iter->cur.ptr, MB_CUR_MAX), | |
147 &iter->state); | |
148 if (iter->cur.bytes == (size_t) -1) | |
149 { | |
150 /* An invalid multibyte sequence was encountered. */ | |
151 iter->cur.bytes = 1; | |
152 iter->cur.wc_valid = false; | |
153 /* Whether to set iter->in_shift = false and reset iter->state | |
154 or not is not very important; the string is bogus anyway. */ | |
155 } | |
156 else if (iter->cur.bytes == (size_t) -2) | |
157 { | |
158 /* An incomplete multibyte character at the end. */ | |
159 iter->cur.bytes = strlen (iter->cur.ptr); | |
160 iter->cur.wc_valid = false; | |
161 /* Whether to set iter->in_shift = false and reset iter->state | |
162 or not is not important; the string end is reached anyway. */ | |
163 } | |
164 else | |
165 { | |
166 if (iter->cur.bytes == 0) | |
167 { | |
168 /* A null wide character was encountered. */ | |
169 iter->cur.bytes = 1; | |
170 assert (*iter->cur.ptr == '\0'); | |
171 assert (iter->cur.wc == 0); | |
172 } | |
173 iter->cur.wc_valid = true; | |
174 | |
175 /* When in the initial state, we can go back treating ASCII | |
176 characters more quickly. */ | |
177 if (mbsinit (&iter->state)) | |
178 iter->in_shift = false; | |
179 } | |
180 } | |
181 iter->next_done = true; | |
182 } | |
183 | |
184 static inline void | |
185 mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff) | |
186 { | |
187 iter->cur.ptr += ptrdiff; | |
188 } | |
189 | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
190 static inline void |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
191 mbuiter_multi_copy (struct mbuiter_multi *new_iter, const struct mbuiter_multi *old_iter) |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
192 { |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
193 if ((new_iter->in_shift = old_iter->in_shift)) |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
194 memcpy (&new_iter->state, &old_iter->state, sizeof (mbstate_t)); |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
195 else |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
196 memset (&new_iter->state, 0, sizeof (mbstate_t)); |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
197 new_iter->next_done = old_iter->next_done; |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
198 mb_copy (&new_iter->cur, &old_iter->cur); |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
199 } |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
200 |
6055 | 201 /* Iteration macros. */ |
202 typedef struct mbuiter_multi mbui_iterator_t; | |
203 #define mbui_init(iter, startptr) \ | |
204 ((iter).cur.ptr = (startptr), \ | |
205 (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \ | |
206 (iter).next_done = false) | |
207 #define mbui_avail(iter) \ | |
208 (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur)) | |
209 #define mbui_advance(iter) \ | |
210 ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) | |
211 | |
212 /* Access to the current character. */ | |
213 #define mbui_cur(iter) (iter).cur | |
214 #define mbui_cur_ptr(iter) (iter).cur.ptr | |
215 | |
216 /* Relocation. */ | |
217 #define mbui_reloc(iter, ptrdiff) mbuiter_multi_reloc (&iter, ptrdiff) | |
218 | |
8127
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
219 /* Copying an iterator. */ |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
220 #define mbui_copy mbuiter_multi_copy |
127a096061c8
Support for copying multibyte string iterators.
Bruno Haible <bruno@clisp.org>
parents:
6055
diff
changeset
|
221 |
6055 | 222 #endif /* _MBUITER_H */ |