Mercurial > hg > octave-kai > gnulib-hg
annotate lib/readtokens.c @ 12421:e8d2c6fc33ad
Use spaces for indentation, not tabs.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Thu, 10 Dec 2009 20:28:30 +0100 |
parents | 5442b2430859 |
children | b5e42ef33b49 |
rev | line source |
---|---|
375 | 1 /* readtokens.c -- Functions for reading tokens from an input stream. |
7302
8a1a9361108c
* _fpending.c: Include <config.h> unconditionally, since we no
Paul Eggert <eggert@cs.ucla.edu>
parents:
5848
diff
changeset
|
2 |
11395
5442b2430859
avoid gcc warnings about unused macro definitions
Jim Meyering <meyering@redhat.com>
parents:
9309
diff
changeset
|
3 Copyright (C) 1990-1991, 1999-2004, 2006, 2009 Free Software Foundation, Inc. |
375 | 4 |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
5 This program is free software: you can redistribute it and/or modify |
375 | 6 it under the terms of the GNU General Public License as published by |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
7 the Free Software Foundation; either version 3 of the License, or |
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
8 (at your option) any later version. |
375 | 9 |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU General Public License | |
9309
bbbbbf4cd1c5
Change copyright notice from GPLv2+ to GPLv3+.
Bruno Haible <bruno@clisp.org>
parents:
7302
diff
changeset
|
16 along with this program. If not, see <http://www.gnu.org/licenses/>. |
375 | 17 |
18 Written by Jim Meyering. */ | |
19 | |
20 /* This almost supercedes xreadline stuff -- using delim="\n" | |
21 gives the same functionality, except that these functions | |
4995 | 22 would never return empty lines. */ |
375 | 23 |
7302
8a1a9361108c
* _fpending.c: Include <config.h> unconditionally, since we no
Paul Eggert <eggert@cs.ucla.edu>
parents:
5848
diff
changeset
|
24 #include <config.h> |
375 | 25 |
4672 | 26 #include "readtokens.h" |
375 | 27 |
4672 | 28 #include <stdio.h> |
29 #include <stdlib.h> | |
30 #include <string.h> | |
4995 | 31 #include <stdbool.h> |
375 | 32 |
3622
7f1ff9bec452
Remove explicit declarations of xmalloc and xrealloc,
Jim Meyering <jim@meyering.net>
parents:
3618
diff
changeset
|
33 #include "xalloc.h" |
375 | 34 |
5318
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
35 #if USE_UNLOCKED_IO |
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
36 # include "unlocked-io.h" |
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
37 #endif |
7c24a825b51d
Remove dependencies on unlocked-io.
Paul Eggert <eggert@cs.ucla.edu>
parents:
5159
diff
changeset
|
38 |
375 | 39 /* Initialize a tokenbuffer. */ |
40 | |
41 void | |
4672 | 42 init_tokenbuffer (token_buffer *tokenbuffer) |
375 | 43 { |
4995 | 44 tokenbuffer->size = 0; |
45 tokenbuffer->buffer = NULL; | |
375 | 46 } |
47 | |
4995 | 48 /* Read a token from STREAM into TOKENBUFFER. |
49 A token is delimited by any of the N_DELIM bytes in DELIM. | |
375 | 50 Upon return, the token is in tokenbuffer->buffer and |
4995 | 51 has a trailing '\0' instead of any original delimiter. |
375 | 52 The function value is the length of the token not including |
4995 | 53 the final '\0'. Upon EOF (i.e. on the call after the last |
54 token is read) or error, return -1 without modifying tokenbuffer. | |
55 The EOF and error conditions may be distinguished in the caller | |
56 by testing ferror (STREAM). | |
375 | 57 |
4995 | 58 This function works properly on lines containing NUL bytes |
59 and on files do not end with a delimiter. */ | |
375 | 60 |
4995 | 61 size_t |
1778
c30d7ad98237
(readtoken, readtokens): Protoize.
Jim Meyering <jim@meyering.net>
parents:
653
diff
changeset
|
62 readtoken (FILE *stream, |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
63 const char *delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
64 size_t n_delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
65 token_buffer *tokenbuffer) |
375 | 66 { |
67 char *p; | |
4995 | 68 int c; |
69 size_t i, n; | |
375 | 70 static const char *saved_delim = NULL; |
71 static char isdelim[256]; | |
4995 | 72 bool same_delimiters; |
375 | 73 |
74 if (delim == NULL && saved_delim == NULL) | |
75 abort (); | |
76 | |
4995 | 77 same_delimiters = false; |
375 | 78 if (delim != saved_delim && saved_delim != NULL) |
79 { | |
4995 | 80 same_delimiters = true; |
375 | 81 for (i = 0; i < n_delim; i++) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
82 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
83 if (delim[i] != saved_delim[i]) |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
84 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
85 same_delimiters = false; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
86 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
87 } |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
88 } |
375 | 89 } |
90 | |
91 if (!same_delimiters) | |
92 { | |
4995 | 93 size_t j; |
375 | 94 saved_delim = delim; |
4995 | 95 memset (isdelim, 0, sizeof isdelim); |
96 for (j = 0; j < n_delim; j++) | |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
97 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
98 unsigned char ch = delim[j]; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
99 isdelim[ch] = 1; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
100 } |
375 | 101 } |
102 | |
4995 | 103 /* FIXME: don't fool with this caching. Use strchr instead. */ |
375 | 104 /* skip over any leading delimiters */ |
105 for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream)) | |
106 { | |
107 /* empty */ | |
108 } | |
109 | |
4995 | 110 p = tokenbuffer->buffer; |
111 n = tokenbuffer->size; | |
112 i = 0; | |
375 | 113 for (;;) |
114 { | |
4995 | 115 if (c < 0 && i == 0) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
116 return -1; |
4995 | 117 |
118 if (i == n) | |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
119 p = x2nrealloc (p, &n, sizeof *p); |
4995 | 120 |
375 | 121 if (c < 0) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
122 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
123 p[i] = 0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
124 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
125 } |
375 | 126 if (isdelim[c]) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
127 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
128 p[i] = 0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
129 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
130 } |
375 | 131 p[i++] = c; |
132 c = getc (stream); | |
133 } | |
134 | |
135 tokenbuffer->buffer = p; | |
136 tokenbuffer->size = n; | |
4995 | 137 return i; |
375 | 138 } |
139 | |
4995 | 140 /* Build a NULL-terminated array of pointers to tokens |
141 read from STREAM. Return the number of tokens read. | |
142 All storage is obtained through calls to xmalloc-like functions. | |
375 | 143 |
144 %%% Question: is it worth it to do a single | |
145 %%% realloc() of `tokens' just before returning? */ | |
146 | |
4995 | 147 size_t |
1778
c30d7ad98237
(readtoken, readtokens): Protoize.
Jim Meyering <jim@meyering.net>
parents:
653
diff
changeset
|
148 readtokens (FILE *stream, |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
149 size_t projected_n_tokens, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
150 const char *delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
151 size_t n_delim, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
152 char ***tokens_out, |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
153 size_t **token_lengths) |
375 | 154 { |
155 token_buffer tb, *token = &tb; | |
156 char **tokens; | |
4995 | 157 size_t *lengths; |
158 size_t sz; | |
159 size_t n_tokens; | |
160 | |
161 if (projected_n_tokens == 0) | |
162 projected_n_tokens = 64; | |
163 else | |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
164 projected_n_tokens++; /* add one for trailing NULL pointer */ |
4995 | 165 |
166 sz = projected_n_tokens; | |
167 tokens = xnmalloc (sz, sizeof *tokens); | |
168 lengths = xnmalloc (sz, sizeof *lengths); | |
375 | 169 |
170 n_tokens = 0; | |
171 init_tokenbuffer (token); | |
172 for (;;) | |
173 { | |
174 char *tmp; | |
4995 | 175 size_t token_length = readtoken (stream, delim, n_delim, token); |
375 | 176 if (n_tokens >= sz) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
177 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
178 tokens = x2nrealloc (tokens, &sz, sizeof *tokens); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
179 lengths = xnrealloc (lengths, sz, sizeof *lengths); |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
180 } |
375 | 181 |
4995 | 182 if (token_length == (size_t) -1) |
12421
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
183 { |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
184 /* don't increment n_tokens for NULL entry */ |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
185 tokens[n_tokens] = NULL; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
186 lengths[n_tokens] = 0; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
187 break; |
e8d2c6fc33ad
Use spaces for indentation, not tabs.
Bruno Haible <bruno@clisp.org>
parents:
11395
diff
changeset
|
188 } |
4995 | 189 tmp = xnmalloc (token_length + 1, sizeof *tmp); |
375 | 190 lengths[n_tokens] = token_length; |
4995 | 191 tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1); |
375 | 192 n_tokens++; |
193 } | |
194 | |
195 free (token->buffer); | |
196 *tokens_out = tokens; | |
197 if (token_lengths != NULL) | |
198 *token_lengths = lengths; | |
199 return n_tokens; | |
200 } |