Mercurial > hg > octave-kai > gnulib-hg
annotate lib/readtokens.c @ 17476:6057744acd2c default tip master
autoupdate
author | Karl Berry <karl@freefriends.org> |
---|---|
date | Fri, 16 Aug 2013 06:32:22 -0700 |
parents | e542fd46ad6f |
children |
rev | line source |
---|---|
375 | 1 /* readtokens.c -- Functions for reading tokens from an input stream. |
7302
8a1a9361108c
* _fpending.c: Include <config.h> unconditionally, since we no
Paul Eggert <eggert@cs.ucla.edu>
parents:
5848
diff
changeset
|
2 |
17249
e542fd46ad6f
maint: update all copyright year number ranges
Eric Blake <eblake@redhat.com>
parents:
16630
diff
changeset
|
3 Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2013 Free Software |
12559
c2cbabec01dd
update nearly all FSF copyright year lists to include 2010
Jim Meyering <meyering@redhat.com>
parents:
12518
diff
changeset
|
4 Foundation, Inc. |
375 | 5 |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
6 This program is free software: you can redistribute it and/or modify |
375 | 7 it under the terms of the GNU General Public License as published by |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
8 the Free Software Foundation; either version 3 of the License, or |
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
9 (at your option) any later version. |
375 | 10 |
11 This program is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
17 along with this program. If not, see <http://www.gnu.org/licenses/>. |
375 | 18 |
19 Written by Jim Meyering. */ | |
20 | |
16358 | 21 /* This almost supersedes xreadline stuff -- using delim="\n" |
375 | 22 gives the same functionality, except that these functions |
4995 | 23 would never return empty lines. */ |
375 | 24 |
7302
8a1a9361108c
* _fpending.c: Include <config.h> unconditionally, since we no
Paul Eggert <eggert@cs.ucla.edu>
parents:
5848
diff
changeset
|
25 #include <config.h> |
375 | 26 |
4672 | 27 #include "readtokens.h" |
375 | 28 |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
29 #include <limits.h> |
4672 | 30 #include <stdio.h> |
31 #include <stdlib.h> | |
32 #include <string.h> | |
4995 | 33 #include <stdbool.h> |
375 | 34 |
12582
21afac8020be
readtokens: this module *does* require xalloc.h
Jim Meyering <meyering@redhat.com>
parents:
12577
diff
changeset
|
35 #include "xalloc.h" |
21afac8020be
readtokens: this module *does* require xalloc.h
Jim Meyering <meyering@redhat.com>
parents:
12577
diff
changeset
|
36 |
5318
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
37 #if USE_UNLOCKED_IO |
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
38 # include "unlocked-io.h" |
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
39 #endif |
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
40 |
375 | 41 /* Initialize a tokenbuffer. */ |
42 | |
43 void | |
4672 | 44 init_tokenbuffer (token_buffer *tokenbuffer) |
375 | 45 { |
4995 | 46 tokenbuffer->size = 0; |
47 tokenbuffer->buffer = NULL; | |
375 | 48 } |
49 | |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
50 typedef size_t word; |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
51 enum { bits_per_word = sizeof (word) * CHAR_BIT }; |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
52 |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
53 static bool |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
54 get_nth_bit (size_t n, word const *bitset) |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
55 { |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
56 return bitset[n / bits_per_word] >> n % bits_per_word & 1; |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
57 } |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
58 |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
59 static void |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
60 set_nth_bit (size_t n, word *bitset) |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
61 { |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
62 size_t one = 1; |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
63 bitset[n / bits_per_word] |= one << n % bits_per_word; |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
64 } |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
65 |
4995 | 66 /* Read a token from STREAM into TOKENBUFFER. |
67 A token is delimited by any of the N_DELIM bytes in DELIM. | |
375 | 68 Upon return, the token is in tokenbuffer->buffer and |
4995 | 69 has a trailing '\0' instead of any original delimiter. |
375 | 70 The function value is the length of the token not including |
4995 | 71 the final '\0'. Upon EOF (i.e. on the call after the last |
72 token is read) or error, return -1 without modifying tokenbuffer. | |
73 The EOF and error conditions may be distinguished in the caller | |
74 by testing ferror (STREAM). | |
375 | 75 |
4995 | 76 This function works properly on lines containing NUL bytes |
16630
1ac24dbbff5a
readtokens.c: adjust wording in a comment
Jim Meyering <meyering@redhat.com>
parents:
16596
diff
changeset
|
77 and on files that do not end with a delimiter. */ |
375 | 78 |
4995 | 79 size_t |
1778
c30d7ad98237
(readtoken, readtokens): Protoize.
Jim Meyering <jim@meyering.net>
parents:
653
diff
changeset
|
80 readtoken (FILE *stream, |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
81 const char *delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
82 size_t n_delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
83 token_buffer *tokenbuffer) |
375 | 84 { |
85 char *p; | |
4995 | 86 int c; |
87 size_t i, n; | |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
88 word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word]; |
375 | 89 |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
90 memset (isdelim, 0, sizeof isdelim); |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
91 for (i = 0; i < n_delim; i++) |
375 | 92 { |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
93 unsigned char ch = delim[i]; |
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
94 set_nth_bit (ch, isdelim); |
375 | 95 } |
96 | |
97 /* skip over any leading delimiters */ | |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
98 for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream)) |
375 | 99 { |
100 /* empty */ | |
101 } | |
102 | |
4995 | 103 p = tokenbuffer->buffer; |
104 n = tokenbuffer->size; | |
105 i = 0; | |
375 | 106 for (;;) |
107 { | |
4995 | 108 if (c < 0 && i == 0) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
109 return -1; |
4995 | 110 |
111 if (i == n) | |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
112 p = x2nrealloc (p, &n, sizeof *p); |
4995 | 113 |
375 | 114 if (c < 0) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
115 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
116 p[i] = 0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
117 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
118 } |
16596
e2c60dd50c61
readtokens: avoid core dumps with unusual calling patterns
Paul Eggert <eggert@cs.ucla.edu>
parents:
16358
diff
changeset
|
119 if (get_nth_bit (c, isdelim)) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
120 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
121 p[i] = 0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
122 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
123 } |
375 | 124 p[i++] = c; |
125 c = getc (stream); | |
126 } | |
127 | |
128 tokenbuffer->buffer = p; | |
129 tokenbuffer->size = n; | |
4995 | 130 return i; |
375 | 131 } |
132 | |
4995 | 133 /* Build a NULL-terminated array of pointers to tokens |
134 read from STREAM. Return the number of tokens read. | |
135 All storage is obtained through calls to xmalloc-like functions. | |
375 | 136 |
137 %%% Question: is it worth it to do a single | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
138 %%% realloc() of 'tokens' just before returning? */ |
375 | 139 |
4995 | 140 size_t |
1778
c30d7ad98237
(readtoken, readtokens): Protoize.
Jim Meyering <jim@meyering.net>
parents:
653
diff
changeset
|
141 readtokens (FILE *stream, |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
142 size_t projected_n_tokens, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
143 const char *delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
144 size_t n_delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
145 char ***tokens_out, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
146 size_t **token_lengths) |
375 | 147 { |
148 token_buffer tb, *token = &tb; | |
149 char **tokens; | |
4995 | 150 size_t *lengths; |
151 size_t sz; | |
152 size_t n_tokens; | |
153 | |
154 if (projected_n_tokens == 0) | |
155 projected_n_tokens = 64; | |
156 else | |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
157 projected_n_tokens++; /* add one for trailing NULL pointer */ |
4995 | 158 |
159 sz = projected_n_tokens; | |
160 tokens = xnmalloc (sz, sizeof *tokens); | |
161 lengths = xnmalloc (sz, sizeof *lengths); | |
375 | 162 |
163 n_tokens = 0; | |
164 init_tokenbuffer (token); | |
165 for (;;) | |
166 { | |
167 char *tmp; | |
4995 | 168 size_t token_length = readtoken (stream, delim, n_delim, token); |
375 | 169 if (n_tokens >= sz) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
170 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
171 tokens = x2nrealloc (tokens, &sz, sizeof *tokens); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
172 lengths = xnrealloc (lengths, sz, sizeof *lengths); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
173 } |
375 | 174 |
4995 | 175 if (token_length == (size_t) -1) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
176 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
177 /* don't increment n_tokens for NULL entry */ |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
178 tokens[n_tokens] = NULL; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
179 lengths[n_tokens] = 0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
180 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
181 } |
4995 | 182 tmp = xnmalloc (token_length + 1, sizeof *tmp); |
375 | 183 lengths[n_tokens] = token_length; |
4995 | 184 tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1); |
375 | 185 n_tokens++; |
186 } | |
187 | |
188 free (token->buffer); | |
189 *tokens_out = tokens; | |
190 if (token_lengths != NULL) | |
191 *token_lengths = lengths; | |
12747
366ef0bb3316
readtokens: do not leak internal token_lengths buffer
Jim Meyering <meyering@redhat.com>
parents:
12582
diff
changeset
|
192 else |
366ef0bb3316
readtokens: do not leak internal token_lengths buffer
Jim Meyering <meyering@redhat.com>
parents:
12582
diff
changeset
|
193 free (lengths); |
375 | 194 return n_tokens; |
195 } |