GRASS GIS 7 Programmer's Manual  7.9.dev(2021)-e5379bbd7
gis/token.c
Go to the documentation of this file.
1 
2 /*!
3  \file lib/gis/token.c
4 
5  \brief GIS Library - Tokenize strings
6 
7  (C) 2001-2008, 2011-2013 by the GRASS Development Team
8 
9  This program is free software under the GNU General Public License
10  (>=v2). Read the file COPYING that comes with GRASS for details.
11 
12  \author USA CERL and others
13 */
14 
15 #include <stdlib.h>
16 #include <string.h>
17 #include <grass/gis.h>
18 #include <grass/glocale.h>
19 
20 static char **tokenize(const char *, const char *, const char *);
21 
22 /*!
23  \brief Tokenize string
24 
25  Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
26  '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
27  must not contain a new line (\n). <em>delim</em> may consist of more
28  than one character. G_free_tokens() must be called when finished
29  with tokens to release memory.
30 
31  Example:
32  \code
33  char **tokens;
34  int ntok, i;
35  tokens = G_tokenize(buf, " |:,");
36  ntok = G_number_of_tokens(tokens);
37  for (i=0; i < ntok; i++) {
38  G_debug(1, "%d=[%s]", i, tokens[i]);
39  }
40  G_free_tokens(tokens);
41  \endcode
42 
43  \param buf input string
44  \param delim string delimiter
45 
46  \return pointer to string token
47 */
48 char **G_tokenize(const char *buf, const char *delim)
49 {
50  return tokenize(buf, delim, NULL);
51 }
52 
53 /*!
54  \brief Tokenize string
55 
56  This function behaves similarly to G_tokenize().
57 
58  It introduces <em>valchar</em> which defines borders of token. Within
59  token <em>delim</em> is ignored.
60 
61  Example:
62  \code
63  char *str = "a,'b,c',d";
64 
65  char **tokens1, **tokens2;
66  int ntok1, ntok2;
67 
68  tokens1 = G_tokenize(str, ",");
69  ntok1 = G_number_of_tokens(tokens1);
70 
71  tokens1 = G_tokenize2(str, ",", "'");
72  ntok2 = G_number_of_tokens(tokens2);
73  \endcode
74 
75  In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
76  i.e. { "a", "'b, c'", "d"}
77 
78  \param buf input string
79  \param delim string delimiter
80  \param valchar character defining border of token
81 
82  \return pointer to string token
83 */
84 char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
85 {
86  return tokenize(buf, delim, valchar);
87 }
88 
89 char **tokenize(const char *buf, const char *delim, const char *inchar)
90 {
91  int i;
92  char **tokens;
93  const char *p;
94  char *q;
95  enum {
96  S_START,
97  S_IN_QUOTE,
98  S_AFTER_QUOTE,
99  };
100  enum {
101  A_NO_OP,
102  A_ADD_CHAR,
103  A_NEW_FIELD,
104  A_END_RECORD,
105  A_ERROR
106  };
107  int state;
108  int quo = inchar ? *inchar : -1;
109 
110  /* do not modify buf, make a copy */
111  p = q = G_store(buf);
112 
113  i = 0;
114  tokens = (char **)G_malloc(2 * sizeof(char *));
115 
116  /* always one token */
117  tokens[i++] = q;
118 
119  for (state = S_START; ; p++) {
120  int c = *p;
121  int action = A_NO_OP;
122  switch (state) {
123  case S_START:
124  if (c == quo)
125  state = S_IN_QUOTE;
126  else if (c == '\0')
127  action = A_END_RECORD;
128  else if (strchr(delim, c))
129  action = A_NEW_FIELD;
130  else
131  action = A_ADD_CHAR;
132  break;
133  case S_IN_QUOTE:
134  if (c == quo)
135  state = S_AFTER_QUOTE;
136  else if (c == '\0')
137  action = A_ERROR;
138  else
139  action = A_ADD_CHAR;
140  break;
141  case S_AFTER_QUOTE:
142  if (c == quo)
143  state = S_IN_QUOTE, action = A_ADD_CHAR;
144  else if (c == '\0')
145  action = A_END_RECORD;
146  else if (strchr(delim, c))
147  state = S_START, action = A_NEW_FIELD;
148  else
149  action = A_ERROR;
150  break;
151  }
152 
153  switch (action) {
154  case A_NO_OP:
155  break;
156  case A_ADD_CHAR:
157  *q++ = *p;
158  break;
159  case A_NEW_FIELD:
160  *q++ = '\0';
161  tokens[i++] = q;
162  tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
163  break;
164  case A_END_RECORD:
165  *q++ = '\0';
166  tokens[i++] = NULL;
167  return tokens;
168  case A_ERROR:
169  G_warning(_("parse error"));
170  *q++ = '\0';
171  tokens[i++] = NULL;
172  return tokens;
173  }
174  }
175 }
176 
177 /*!
178  \brief Return number of tokens
179 
180  \param tokens
181 
182  \return number of tokens
183 */
184 
185 int G_number_of_tokens(char **tokens)
186 {
187  int n;
188 
189  n = 0;
190  for (n = 0; tokens[n] != NULL; n++)
191  ;
192 
193  return n;
194 }
195 
196 /*!
197  \brief Free memory allocated to tokens.
198 
199  <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
200  tokens to release memory.
201 
202  \param[out] tokens
203 */
204 void G_free_tokens(char **tokens)
205 {
206  if (tokens[0] != NULL)
207  G_free(tokens[0]);
208  G_free(tokens);
209 }
#define G_malloc(n)
Definition: defs/gis.h:112
char ** G_tokenize(const char *buf, const char *delim)
Tokenize string.
Definition: gis/token.c:48
void G_free(void *)
Free allocated memory.
Definition: gis/alloc.c:149
#define NULL
Definition: ccmath.h:32
int G_number_of_tokens(char **tokens)
Return number of tokens.
Definition: gis/token.c:185
void G_warning(const char *,...) __attribute__((format(printf
void G_free_tokens(char **tokens)
Free memory allocated to tokens.
Definition: gis/token.c:204
#define G_realloc(p, n)
Definition: defs/gis.h:114
#define _(str)
Definition: glocale.h:10
char * G_store(const char *)
Copy string to allocated memory.
Definition: strings.c:87
char ** G_tokenize2(const char *buf, const char *delim, const char *valchar)
Tokenize string.
Definition: gis/token.c:84
struct state state
Definition: parser.c:103