]> Dogcows Code - chaz/yoink/blob - yajl/src/yajl_parser.c
minor cleanups
[chaz/yoink] / yajl / src / yajl_parser.c
1 /*
2 * Copyright 2007-2009, Lloyd Hilaiel.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. Neither the name of Lloyd Hilaiel nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include "yajl_lex.h"
34 #include "yajl_parser.h"
35 #include "yajl_encode.h"
36 #include "yajl_bytestack.h"
37
38 #include <stdlib.h>
39 #include <limits.h>
40 #include <errno.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <ctype.h>
44 #include <assert.h>
45 #include <math.h>
46
47 unsigned char *
48 yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
49 unsigned int jsonTextLen, int verbose)
50 {
51 unsigned int offset = hand->errorOffset;
52 unsigned char * str;
53 const char * errorType = NULL;
54 const char * errorText = NULL;
55 char text[72];
56 const char * arrow = " (right here) ------^\n";
57
58 if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
59 errorType = "parse";
60 errorText = hand->parseError;
61 } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
62 errorType = "lexical";
63 errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
64 } else {
65 errorType = "unknown";
66 }
67
68 {
69 unsigned int memneeded = 0;
70 memneeded += strlen(errorType);
71 memneeded += strlen(" error");
72 if (errorText != NULL) {
73 memneeded += strlen(": ");
74 memneeded += strlen(errorText);
75 }
76 str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
77 str[0] = 0;
78 strcat((char *) str, errorType);
79 strcat((char *) str, " error");
80 if (errorText != NULL) {
81 strcat((char *) str, ": ");
82 strcat((char *) str, errorText);
83 }
84 strcat((char *) str, "\n");
85 }
86
87 /* now we append as many spaces as needed to make sure the error
88 * falls at char 41, if verbose was specified */
89 if (verbose) {
90 unsigned int start, end, i;
91 unsigned int spacesNeeded;
92
93 spacesNeeded = (offset < 30 ? 40 - offset : 10);
94 start = (offset >= 30 ? offset - 30 : 0);
95 end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
96
97 for (i=0;i<spacesNeeded;i++) text[i] = ' ';
98
99 for (;start < end;start++, i++) {
100 if (jsonText[start] != '\n' && jsonText[start] != '\r')
101 {
102 text[i] = jsonText[start];
103 }
104 else
105 {
106 text[i] = ' ';
107 }
108 }
109 assert(i <= 71);
110 text[i++] = '\n';
111 text[i] = 0;
112 {
113 char * newStr = (char *)
114 YA_MALLOC(&(hand->alloc), (strlen((char *) str) +
115 strlen((char *) text) +
116 strlen(arrow) + 1));
117 newStr[0] = 0;
118 strcat((char *) newStr, (char *) str);
119 strcat((char *) newStr, text);
120 strcat((char *) newStr, arrow);
121 YA_FREE(&(hand->alloc), str);
122 str = (unsigned char *) newStr;
123 }
124 }
125 return str;
126 }
127
128 /* check for client cancelation */
129 #define _CC_CHK(x) \
130 if (!(x)) { \
131 yajl_bs_set(hand->stateStack, yajl_state_parse_error); \
132 hand->parseError = \
133 "client cancelled parse via callback return value"; \
134 return yajl_status_client_canceled; \
135 }
136
137
138 yajl_status
139 yajl_do_parse(yajl_handle hand, unsigned int * offset,
140 const unsigned char * jsonText, unsigned int jsonTextLen)
141 {
142 yajl_tok tok;
143 const unsigned char * buf;
144 unsigned int bufLen;
145
146 around_again:
147 switch (yajl_bs_current(hand->stateStack)) {
148 case yajl_state_parse_complete:
149 return yajl_status_ok;
150 case yajl_state_lexical_error:
151 case yajl_state_parse_error:
152 hand->errorOffset = *offset;
153 return yajl_status_error;
154 case yajl_state_start:
155 case yajl_state_map_need_val:
156 case yajl_state_array_need_val:
157 case yajl_state_array_start: {
158 /* for arrays and maps, we advance the state for this
159 * depth, then push the state of the next depth.
160 * If an error occurs during the parsing of the nesting
161 * enitity, the state at this level will not matter.
162 * a state that needs pushing will be anything other
163 * than state_start */
164 yajl_state stateToPush = yajl_state_start;
165
166 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
167 offset, &buf, &bufLen);
168
169 switch (tok) {
170 case yajl_tok_eof:
171 return yajl_status_insufficient_data;
172 case yajl_tok_error:
173 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
174 goto around_again;
175 case yajl_tok_string:
176 if (hand->callbacks && hand->callbacks->yajl_string) {
177 _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
178 buf, bufLen));
179 }
180 break;
181 case yajl_tok_string_with_escapes:
182 if (hand->callbacks && hand->callbacks->yajl_string) {
183 yajl_buf_clear(hand->decodeBuf);
184 yajl_string_decode(hand->decodeBuf, buf, bufLen);
185 _CC_CHK(hand->callbacks->yajl_string(
186 hand->ctx, yajl_buf_data(hand->decodeBuf),
187 yajl_buf_len(hand->decodeBuf)));
188 }
189 break;
190 case yajl_tok_bool:
191 if (hand->callbacks && hand->callbacks->yajl_boolean) {
192 _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
193 *buf == 't'));
194 }
195 break;
196 case yajl_tok_null:
197 if (hand->callbacks && hand->callbacks->yajl_null) {
198 _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
199 }
200 break;
201 case yajl_tok_left_bracket:
202 if (hand->callbacks && hand->callbacks->yajl_start_map) {
203 _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
204 }
205 stateToPush = yajl_state_map_start;
206 break;
207 case yajl_tok_left_brace:
208 if (hand->callbacks && hand->callbacks->yajl_start_array) {
209 _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
210 }
211 stateToPush = yajl_state_array_start;
212 break;
213 case yajl_tok_integer:
214 /*
215 * note. strtol does not respect the length of
216 * the lexical token. in a corner case where the
217 * lexed number is a integer with a trailing zero,
218 * immediately followed by the end of buffer,
219 * sscanf could run off into oblivion and cause a
220 * crash. for this reason we copy the integer
221 * (and doubles), into our parse buffer (the same
222 * one used for unescaping strings), before
223 * calling strtol. yajl_buf ensures null padding,
224 * so we're safe.
225 */
226 if (hand->callbacks) {
227 if (hand->callbacks->yajl_number) {
228 _CC_CHK(hand->callbacks->yajl_number(
229 hand->ctx,(const char *) buf, bufLen));
230 } else if (hand->callbacks->yajl_integer) {
231 long int i = 0;
232 yajl_buf_clear(hand->decodeBuf);
233 yajl_buf_append(hand->decodeBuf, buf, bufLen);
234 buf = yajl_buf_data(hand->decodeBuf);
235 i = strtol((const char *) buf, NULL, 10);
236 if ((i == LONG_MIN || i == LONG_MAX) &&
237 errno == ERANGE)
238 {
239 yajl_bs_set(hand->stateStack,
240 yajl_state_parse_error);
241 hand->parseError = "integer overflow" ;
242 /* try to restore error offset */
243 if (*offset >= bufLen) *offset -= bufLen;
244 else *offset = 0;
245 goto around_again;
246 }
247 _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
248 i));
249 }
250 }
251 break;
252 case yajl_tok_double:
253 if (hand->callbacks) {
254 if (hand->callbacks->yajl_number) {
255 _CC_CHK(hand->callbacks->yajl_number(
256 hand->ctx, (const char *) buf, bufLen));
257 } else if (hand->callbacks->yajl_double) {
258 double d = 0.0;
259 yajl_buf_clear(hand->decodeBuf);
260 yajl_buf_append(hand->decodeBuf, buf, bufLen);
261 buf = yajl_buf_data(hand->decodeBuf);
262 d = strtod((char *) buf, NULL);
263 if ((d == HUGE_VAL || d == -HUGE_VAL) &&
264 errno == ERANGE)
265 {
266 yajl_bs_set(hand->stateStack,
267 yajl_state_parse_error);
268 hand->parseError = "numeric (floating point) "
269 "overflow";
270 /* try to restore error offset */
271 if (*offset >= bufLen) *offset -= bufLen;
272 else *offset = 0;
273 goto around_again;
274 }
275 _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
276 d));
277 }
278 }
279 break;
280 case yajl_tok_right_brace: {
281 if (yajl_bs_current(hand->stateStack) ==
282 yajl_state_array_start)
283 {
284 if (hand->callbacks &&
285 hand->callbacks->yajl_end_array)
286 {
287 _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
288 }
289 yajl_bs_pop(hand->stateStack);
290 goto around_again;
291 }
292 /* intentional fall-through */
293 }
294 case yajl_tok_colon:
295 case yajl_tok_comma:
296 case yajl_tok_right_bracket:
297 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
298 hand->parseError =
299 "unallowed token at this point in JSON text";
300 goto around_again;
301 default:
302 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
303 hand->parseError = "invalid token, internal error";
304 goto around_again;
305 }
306 /* got a value. transition depends on the state we're in. */
307 {
308 yajl_state s = yajl_bs_current(hand->stateStack);
309 if (s == yajl_state_start) {
310 yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
311 } else if (s == yajl_state_map_need_val) {
312 yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
313 } else {
314 yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
315 }
316 }
317 if (stateToPush != yajl_state_start) {
318 yajl_bs_push(hand->stateStack, stateToPush);
319 }
320
321 goto around_again;
322 }
323 case yajl_state_map_start:
324 case yajl_state_map_need_key: {
325 /* only difference between these two states is that in
326 * start '}' is valid, whereas in need_key, we've parsed
327 * a comma, and a string key _must_ follow */
328 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
329 offset, &buf, &bufLen);
330 switch (tok) {
331 case yajl_tok_eof:
332 return yajl_status_insufficient_data;
333 case yajl_tok_error:
334 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
335 goto around_again;
336 case yajl_tok_string_with_escapes:
337 if (hand->callbacks && hand->callbacks->yajl_map_key) {
338 yajl_buf_clear(hand->decodeBuf);
339 yajl_string_decode(hand->decodeBuf, buf, bufLen);
340 buf = yajl_buf_data(hand->decodeBuf);
341 bufLen = yajl_buf_len(hand->decodeBuf);
342 }
343 /* intentional fall-through */
344 case yajl_tok_string:
345 if (hand->callbacks && hand->callbacks->yajl_map_key) {
346 _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
347 bufLen));
348 }
349 yajl_bs_set(hand->stateStack, yajl_state_map_sep);
350 goto around_again;
351 case yajl_tok_right_bracket:
352 if (yajl_bs_current(hand->stateStack) ==
353 yajl_state_map_start)
354 {
355 if (hand->callbacks && hand->callbacks->yajl_end_map) {
356 _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
357 }
358 yajl_bs_pop(hand->stateStack);
359 goto around_again;
360 }
361 default:
362 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
363 hand->parseError =
364 "invalid object key (must be a string)";
365 goto around_again;
366 }
367 }
368 case yajl_state_map_sep: {
369 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
370 offset, &buf, &bufLen);
371 switch (tok) {
372 case yajl_tok_colon:
373 yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
374 goto around_again;
375 case yajl_tok_eof:
376 return yajl_status_insufficient_data;
377 case yajl_tok_error:
378 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
379 goto around_again;
380 default:
381 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
382 hand->parseError = "object key and value must "
383 "be separated by a colon (':')";
384 goto around_again;
385 }
386 }
387 case yajl_state_map_got_val: {
388 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
389 offset, &buf, &bufLen);
390 switch (tok) {
391 case yajl_tok_right_bracket:
392 if (hand->callbacks && hand->callbacks->yajl_end_map) {
393 _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
394 }
395 yajl_bs_pop(hand->stateStack);
396 goto around_again;
397 case yajl_tok_comma:
398 yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
399 goto around_again;
400 case yajl_tok_eof:
401 return yajl_status_insufficient_data;
402 case yajl_tok_error:
403 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
404 goto around_again;
405 default:
406 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
407 hand->parseError = "after key and value, inside map, "
408 "I expect ',' or '}'";
409 /* try to restore error offset */
410 if (*offset >= bufLen) *offset -= bufLen;
411 else *offset = 0;
412 goto around_again;
413 }
414 }
415 case yajl_state_array_got_val: {
416 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
417 offset, &buf, &bufLen);
418 switch (tok) {
419 case yajl_tok_right_brace:
420 if (hand->callbacks && hand->callbacks->yajl_end_array) {
421 _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
422 }
423 yajl_bs_pop(hand->stateStack);
424 goto around_again;
425 case yajl_tok_comma:
426 yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
427 goto around_again;
428 case yajl_tok_eof:
429 return yajl_status_insufficient_data;
430 case yajl_tok_error:
431 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
432 goto around_again;
433 default:
434 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
435 hand->parseError =
436 "after array element, I expect ',' or ']'";
437 goto around_again;
438 }
439 }
440 }
441
442 abort();
443 return yajl_status_error;
444 }
445
This page took 0.051424 seconds and 4 git commands to generate.