GRASS GIS 8 Programmer's Manual 8.2.0(2022)-exported
token.c
Go to the documentation of this file.
1
2/*!
3 \file lib/gis/token.c
4
5 \brief GIS Library - Tokenize strings
6
7 (C) 2001-2008, 2011-2013 by the GRASS Development Team
8
9 This program is free software under the GNU General Public License
10 (>=v2). Read the file COPYING that comes with GRASS for details.
11
12 \author USA CERL and others
13*/
14
15#include <stdlib.h>
16#include <string.h>
17#include <grass/gis.h>
18#include <grass/glocale.h>
19
20static char **tokenize(const char *, const char *, const char *);
21
22/*!
23 \brief Tokenize string
24
25 Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
26 '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
27 must not contain a new line (\n). <em>delim</em> may consist of more
28 than one character. G_free_tokens() must be called when finished
29 with tokens to release memory.
30
31 Example:
32 \code
33 char **tokens;
34 int ntok, i;
35 tokens = G_tokenize(buf, " |:,");
36 ntok = G_number_of_tokens(tokens);
37 for (i=0; i < ntok; i++) {
38 G_debug(1, "%d=[%s]", i, tokens[i]);
39 }
40 G_free_tokens(tokens);
41 \endcode
42
43 \param buf input string
44 \param delim string delimiter
45
46 \return pointer to string token
47*/
48char **G_tokenize(const char *buf, const char *delim)
49{
50 return tokenize(buf, delim, NULL);
51}
52
53/*!
54 \brief Tokenize string
55
56 This function behaves similarly to G_tokenize().
57
58 It introduces <em>valchar</em> which defines borders of token. Within
59 token <em>delim</em> is ignored.
60
61 Example:
62 \code
63 char *str = "a,'b,c',d";
64
65 char **tokens1, **tokens2;
66 int ntok1, ntok2;
67
68 tokens1 = G_tokenize(str, ",");
69 ntok1 = G_number_of_tokens(tokens1);
70
71 tokens1 = G_tokenize2(str, ",", "'");
72 ntok2 = G_number_of_tokens(tokens2);
73 \endcode
74
75 In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
76 i.e. { "a", "'b, c'", "d"}
77
78 \param buf input string
79 \param delim string delimiter
80 \param valchar character defining border of token
81
82 \return pointer to string token
83*/
84char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
85{
86 return tokenize(buf, delim, valchar);
87}
88
89char **tokenize(const char *buf, const char *delim, const char *inchar)
90{
91 int i;
92 char **tokens;
93 const char *p;
94 char *q;
95 enum {
96 S_START,
97 S_IN_QUOTE,
98 S_AFTER_QUOTE,
99 };
100 enum {
101 A_NO_OP,
102 A_ADD_CHAR,
103 A_NEW_FIELD,
104 A_END_RECORD,
105 A_ERROR
106 };
107 int state;
108 int quo = inchar ? *inchar : -1;
109
110 /* do not modify buf, make a copy */
111 p = q = G_store(buf);
112
113 i = 0;
114 tokens = (char **)G_malloc(2 * sizeof(char *));
115
116 /* always one token */
117 tokens[i++] = q;
118
119 for (state = S_START; ; p++) {
120 int c = *p;
121 int action = A_NO_OP;
122 switch (state) {
123 case S_START:
124 if (c == quo)
125 state = S_IN_QUOTE;
126 else if (c == '\0')
127 action = A_END_RECORD;
128 else if (strchr(delim, c))
129 action = A_NEW_FIELD;
130 else
131 action = A_ADD_CHAR;
132 break;
133 case S_IN_QUOTE:
134 if (c == quo)
135 state = S_AFTER_QUOTE;
136 else if (c == '\0')
137 action = A_ERROR;
138 else
139 action = A_ADD_CHAR;
140 break;
141 case S_AFTER_QUOTE:
142 if (c == quo)
143 state = S_IN_QUOTE, action = A_ADD_CHAR;
144 else if (c == '\0')
145 action = A_END_RECORD;
146 else if (strchr(delim, c))
147 state = S_START, action = A_NEW_FIELD;
148 else
149 action = A_ERROR;
150 break;
151 }
152
153 switch (action) {
154 case A_NO_OP:
155 break;
156 case A_ADD_CHAR:
157 *q++ = *p;
158 break;
159 case A_NEW_FIELD:
160 *q++ = '\0';
161 tokens[i++] = q;
162 tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
163 break;
164 case A_END_RECORD:
165 *q++ = '\0';
166 tokens[i++] = NULL;
167 return tokens;
168 case A_ERROR:
169 G_warning(_("parse error"));
170 *q++ = '\0';
171 tokens[i++] = NULL;
172 return tokens;
173 }
174 }
175}
176
177/*!
178 \brief Return number of tokens
179
180 \param tokens
181
182 \return number of tokens
183*/
184
185int G_number_of_tokens(char **tokens)
186{
187 int n;
188
189 n = 0;
190 for (n = 0; tokens[n] != NULL; n++)
191 ;
192
193 return n;
194}
195
196/*!
197 \brief Free memory allocated to tokens.
198
199 <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
200 tokens to release memory.
201
202 \param[out] tokens
203*/
204void G_free_tokens(char **tokens)
205{
206 if (tokens[0] != NULL)
207 G_free(tokens[0]);
208 G_free(tokens);
209}
void G_free(void *buf)
Free allocated memory.
Definition: alloc.c:149
#define NULL
Definition: ccmath.h:32
void G_warning(const char *msg,...)
Print a warning message to stderr.
Definition: gis/error.c:204
struct state state
Definition: parser.c:103
char * G_store(const char *s)
Copy string to allocated memory.
Definition: strings.c:87
char ** G_tokenize2(const char *buf, const char *delim, const char *valchar)
Tokenize string.
Definition: token.c:84
void G_free_tokens(char **tokens)
Free memory allocated to tokens.
Definition: token.c:204
char ** G_tokenize(const char *buf, const char *delim)
Tokenize string.
Definition: token.c:48
int G_number_of_tokens(char **tokens)
Return number of tokens.
Definition: token.c:185