]> Dogcows Code - chaz/yoink/blob - yajl/src/yajl_lex.c
f689ff6fad2e1ceae3e002b015fac2d778eefa11
[chaz/yoink] / yajl / src / yajl_lex.c
1 /*
2 * Copyright 2007-2009, Lloyd Hilaiel.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. Neither the name of Lloyd Hilaiel nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include "yajl_lex.h"
34 #include "yajl_buf.h"
35
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <assert.h>
39 #include <string.h>
40
41 #ifdef YAJL_LEXER_DEBUG
42 static const char *
43 tokToStr(yajl_tok tok)
44 {
45 switch (tok) {
46 case yajl_tok_bool: return "bool";
47 case yajl_tok_colon: return "colon";
48 case yajl_tok_comma: return "comma";
49 case yajl_tok_eof: return "eof";
50 case yajl_tok_error: return "error";
51 case yajl_tok_left_brace: return "brace";
52 case yajl_tok_left_bracket: return "bracket";
53 case yajl_tok_null: return "null";
54 case yajl_tok_integer: return "integer";
55 case yajl_tok_double: return "double";
56 case yajl_tok_right_brace: return "brace";
57 case yajl_tok_right_bracket: return "bracket";
58 case yajl_tok_string: return "string";
59 case yajl_tok_string_with_escapes: return "string_with_escapes";
60 }
61 return "unknown";
62 }
63 #endif
64
65 /* Impact of the stream parsing feature on the lexer:
66 *
67 * YAJL support stream parsing. That is, the ability to parse the first
68 * bits of a chunk of JSON before the last bits are available (still on
69 * the network or disk). This makes the lexer more complex. The
70 * responsibility of the lexer is to handle transparently the case where
71 * a chunk boundary falls in the middle of a token. This is
72 * accomplished is via a buffer and a character reading abstraction.
73 *
74 * Overview of implementation
75 *
76 * When we lex to end of input string before end of token is hit, we
77 * copy all of the input text composing the token into our lexBuf.
78 *
79 * Every time we read a character, we do so through the readChar function.
80 * readChar's responsibility is to handle pulling all chars from the buffer
81 * before pulling chars from input text
82 */
83
84 struct yajl_lexer_t {
85 /* the overal line and char offset into the data */
86 unsigned int lineOff;
87 unsigned int charOff;
88
89 /* error */
90 yajl_lex_error error;
91
92 /* a input buffer to handle the case where a token is spread over
93 * multiple chunks */
94 yajl_buf buf;
95
96 /* in the case where we have data in the lexBuf, bufOff holds
97 * the current offset into the lexBuf. */
98 unsigned int bufOff;
99
100 /* are we using the lex buf? */
101 unsigned int bufInUse;
102
103 /* shall we allow comments? */
104 unsigned int allowComments;
105
106 /* shall we validate utf8 inside strings? */
107 unsigned int validateUTF8;
108
109 yajl_alloc_funcs * alloc;
110 };
111
112 #define readChar(lxr, txt, off) \
113 (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
114 (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
115 ((txt)[(*(off))++]))
116
117 #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
118
119 yajl_lexer
120 yajl_lex_alloc(yajl_alloc_funcs * alloc,
121 unsigned int allowComments, unsigned int validateUTF8)
122 {
123 yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
124 memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
125 lxr->buf = yajl_buf_alloc(alloc);
126 lxr->allowComments = allowComments;
127 lxr->validateUTF8 = validateUTF8;
128 lxr->alloc = alloc;
129 return lxr;
130 }
131
132 void
133 yajl_lex_free(yajl_lexer lxr)
134 {
135 yajl_buf_free(lxr->buf);
136 YA_FREE(lxr->alloc, lxr);
137 return;
138 }
139
140 /* a lookup table which lets us quickly determine three things:
141 * VEC - valid escaped conrol char
142 * IJC - invalid json char
143 * VHC - valid hex char
144 * note. the solidus '/' may be escaped or not.
145 * note. the
146 */
147 #define VEC 1
148 #define IJC 2
149 #define VHC 4
150 static const char charLookupTable[256] =
151 {
152 /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
153 /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
154 /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
155 /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
156
157 /*20*/ 0 , 0 , VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
158 /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
159 /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
160 /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
161
162 /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
163 /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
164 /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
165 /*58*/ 0 , 0 , 0 , 0 , VEC|IJC, 0 , 0 , 0 ,
166
167 /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
168 /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
169 /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
170 /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
171
172 /* include these so we don't have to always check the range of the char */
173 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
174 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
175 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
176 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
177
178 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
179 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
180 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
181 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
182
183 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
184 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
185 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
186 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
187
188 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
189 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
190 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
191 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
192 };
193
194 /** process a variable length utf8 encoded codepoint.
195 *
196 * returns:
197 * yajl_tok_string - if valid utf8 char was parsed and offset was
198 * advanced
199 * yajl_tok_eof - if end of input was hit before validation could
200 * complete
201 * yajl_tok_error - if invalid utf8 was encountered
202 *
203 * NOTE: on error the offset will point to the first char of the
204 * invalid utf8 */
205 #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
206
207 static yajl_tok
208 yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
209 unsigned int jsonTextLen, unsigned int * offset,
210 unsigned char curChar)
211 {
212 if (curChar <= 0x7f) {
213 /* single byte */
214 return yajl_tok_string;
215 } else if ((curChar >> 5) == 0x6) {
216 /* two byte */
217 UTF8_CHECK_EOF;
218 curChar = readChar(lexer, jsonText, offset);
219 if ((curChar >> 6) == 0x2) return yajl_tok_string;
220 } else if ((curChar >> 4) == 0x0e) {
221 /* three byte */
222 UTF8_CHECK_EOF;
223 curChar = readChar(lexer, jsonText, offset);
224 if ((curChar >> 6) == 0x2) {
225 UTF8_CHECK_EOF;
226 curChar = readChar(lexer, jsonText, offset);
227 if ((curChar >> 6) == 0x2) return yajl_tok_string;
228 }
229 } else if ((curChar >> 3) == 0x1e) {
230 /* four byte */
231 UTF8_CHECK_EOF;
232 curChar = readChar(lexer, jsonText, offset);
233 if ((curChar >> 6) == 0x2) {
234 UTF8_CHECK_EOF;
235 curChar = readChar(lexer, jsonText, offset);
236 if ((curChar >> 6) == 0x2) {
237 UTF8_CHECK_EOF;
238 curChar = readChar(lexer, jsonText, offset);
239 if ((curChar >> 6) == 0x2) return yajl_tok_string;
240 }
241 }
242 }
243
244 return yajl_tok_error;
245 }
246
247 /* lex a string. input is the lexer, pointer to beginning of
248 * json text, and start of string (offset).
249 * a token is returned which has the following meanings:
250 * yajl_tok_string: lex of string was successful. offset points to
251 * terminating '"'.
252 * yajl_tok_eof: end of text was encountered before we could complete
253 * the lex.
254 * yajl_tok_error: embedded in the string were unallowable chars. offset
255 * points to the offending char
256 */
257 #define STR_CHECK_EOF \
258 if (*offset >= jsonTextLen) { \
259 tok = yajl_tok_eof; \
260 goto finish_string_lex; \
261 }
262
263 static yajl_tok
264 yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
265 unsigned int jsonTextLen, unsigned int * offset)
266 {
267 yajl_tok tok = yajl_tok_error;
268 int hasEscapes = 0;
269
270 for (;;) {
271 unsigned char curChar;
272
273 STR_CHECK_EOF;
274
275 curChar = readChar(lexer, jsonText, offset);
276
277 /* quote terminates */
278 if (curChar == '"') {
279 tok = yajl_tok_string;
280 break;
281 }
282 /* backslash escapes a set of control chars, */
283 else if (curChar == '\\') {
284 hasEscapes = 1;
285 STR_CHECK_EOF;
286
287 /* special case \u */
288 curChar = readChar(lexer, jsonText, offset);
289 if (curChar == 'u') {
290 unsigned int i = 0;
291
292 for (i=0;i<4;i++) {
293 STR_CHECK_EOF;
294 curChar = readChar(lexer, jsonText, offset);
295 if (!(charLookupTable[curChar] & VHC)) {
296 /* back up to offending char */
297 unreadChar(lexer, offset);
298 lexer->error = yajl_lex_string_invalid_hex_char;
299 goto finish_string_lex;
300 }
301 }
302 } else if (!(charLookupTable[curChar] & VEC)) {
303 /* back up to offending char */
304 unreadChar(lexer, offset);
305 lexer->error = yajl_lex_string_invalid_escaped_char;
306 goto finish_string_lex;
307 }
308 }
309 /* when not validating UTF8 it's a simple table lookup to determine
310 * if the present character is invalid */
311 else if(charLookupTable[curChar] & IJC) {
312 /* back up to offending char */
313 unreadChar(lexer, offset);
314 lexer->error = yajl_lex_string_invalid_json_char;
315 goto finish_string_lex;
316 }
317 /* when in validate UTF8 mode we need to do some extra work */
318 else if (lexer->validateUTF8) {
319 yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
320 offset, curChar);
321
322 if (t == yajl_tok_eof) {
323 tok = yajl_tok_eof;
324 goto finish_string_lex;
325 } else if (t == yajl_tok_error) {
326 lexer->error = yajl_lex_string_invalid_utf8;
327 goto finish_string_lex;
328 }
329 }
330 /* accept it, and move on */
331 }
332 finish_string_lex:
333 /* tell our buddy, the parser, wether he needs to process this string
334 * again */
335 if (hasEscapes && tok == yajl_tok_string) {
336 tok = yajl_tok_string_with_escapes;
337 }
338
339 return tok;
340 }
341
342 #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
343
344 static yajl_tok
345 yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
346 unsigned int jsonTextLen, unsigned int * offset)
347 {
348 /** XXX: numbers are the only entities in json that we must lex
349 * _beyond_ in order to know that they are complete. There
350 * is an ambiguous case for integers at EOF. */
351
352 unsigned char c;
353
354 yajl_tok tok = yajl_tok_integer;
355
356 RETURN_IF_EOF;
357 c = readChar(lexer, jsonText, offset);
358
359 /* optional leading minus */
360 if (c == '-') {
361 RETURN_IF_EOF;
362 c = readChar(lexer, jsonText, offset);
363 }
364
365 /* a single zero, or a series of integers */
366 if (c == '0') {
367 RETURN_IF_EOF;
368 c = readChar(lexer, jsonText, offset);
369 } else if (c >= '1' && c <= '9') {
370 do {
371 RETURN_IF_EOF;
372 c = readChar(lexer, jsonText, offset);
373 } while (c >= '0' && c <= '9');
374 } else {
375 unreadChar(lexer, offset);
376 lexer->error = yajl_lex_missing_integer_after_minus;
377 return yajl_tok_error;
378 }
379
380 /* optional fraction (indicates this is floating point) */
381 if (c == '.') {
382 int numRd = 0;
383
384 RETURN_IF_EOF;
385 c = readChar(lexer, jsonText, offset);
386
387 while (c >= '0' && c <= '9') {
388 numRd++;
389 RETURN_IF_EOF;
390 c = readChar(lexer, jsonText, offset);
391 }
392
393 if (!numRd) {
394 unreadChar(lexer, offset);
395 lexer->error = yajl_lex_missing_integer_after_decimal;
396 return yajl_tok_error;
397 }
398 tok = yajl_tok_double;
399 }
400
401 /* optional exponent (indicates this is floating point) */
402 if (c == 'e' || c == 'E') {
403 RETURN_IF_EOF;
404 c = readChar(lexer, jsonText, offset);
405
406 /* optional sign */
407 if (c == '+' || c == '-') {
408 RETURN_IF_EOF;
409 c = readChar(lexer, jsonText, offset);
410 }
411
412 if (c >= '0' && c <= '9') {
413 do {
414 RETURN_IF_EOF;
415 c = readChar(lexer, jsonText, offset);
416 } while (c >= '0' && c <= '9');
417 } else {
418 unreadChar(lexer, offset);
419 lexer->error = yajl_lex_missing_integer_after_exponent;
420 return yajl_tok_error;
421 }
422 tok = yajl_tok_double;
423 }
424
425 /* we always go "one too far" */
426 unreadChar(lexer, offset);
427
428 return tok;
429 }
430
431 static yajl_tok
432 yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
433 unsigned int jsonTextLen, unsigned int * offset)
434 {
435 unsigned char c;
436
437 yajl_tok tok = yajl_tok_comment;
438
439 RETURN_IF_EOF;
440 c = readChar(lexer, jsonText, offset);
441
442 /* either slash or star expected */
443 if (c == '/') {
444 /* now we throw away until end of line */
445 do {
446 RETURN_IF_EOF;
447 c = readChar(lexer, jsonText, offset);
448 } while (c != '\n');
449 } else if (c == '*') {
450 /* now we throw away until end of comment */
451 for (;;) {
452 RETURN_IF_EOF;
453 c = readChar(lexer, jsonText, offset);
454 if (c == '*') {
455 RETURN_IF_EOF;
456 c = readChar(lexer, jsonText, offset);
457 if (c == '/') {
458 break;
459 } else {
460 unreadChar(lexer, offset);
461 }
462 }
463 }
464 } else {
465 lexer->error = yajl_lex_invalid_char;
466 tok = yajl_tok_error;
467 }
468
469 return tok;
470 }
471
472 yajl_tok
473 yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
474 unsigned int jsonTextLen, unsigned int * offset,
475 const unsigned char ** outBuf, unsigned int * outLen)
476 {
477 yajl_tok tok = yajl_tok_error;
478 unsigned char c;
479 unsigned int startOffset = *offset;
480
481 *outBuf = NULL;
482 *outLen = 0;
483
484 for (;;) {
485 assert(*offset <= jsonTextLen);
486
487 if (*offset >= jsonTextLen) {
488 tok = yajl_tok_eof;
489 goto lexed;
490 }
491
492 c = readChar(lexer, jsonText, offset);
493
494 switch (c) {
495 case '{':
496 tok = yajl_tok_left_bracket;
497 goto lexed;
498 case '}':
499 tok = yajl_tok_right_bracket;
500 goto lexed;
501 case '[':
502 tok = yajl_tok_left_brace;
503 goto lexed;
504 case ']':
505 tok = yajl_tok_right_brace;
506 goto lexed;
507 case ',':
508 tok = yajl_tok_comma;
509 goto lexed;
510 case ':':
511 tok = yajl_tok_colon;
512 goto lexed;
513 case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
514 startOffset++;
515 break;
516 case 't': {
517 const char * want = "rue";
518 do {
519 if (*offset >= jsonTextLen) {
520 tok = yajl_tok_eof;
521 goto lexed;
522 }
523 c = readChar(lexer, jsonText, offset);
524 if (c != *want) {
525 unreadChar(lexer, offset);
526 lexer->error = yajl_lex_invalid_string;
527 tok = yajl_tok_error;
528 goto lexed;
529 }
530 } while (*(++want));
531 tok = yajl_tok_bool;
532 goto lexed;
533 }
534 case 'f': {
535 const char * want = "alse";
536 do {
537 if (*offset >= jsonTextLen) {
538 tok = yajl_tok_eof;
539 goto lexed;
540 }
541 c = readChar(lexer, jsonText, offset);
542 if (c != *want) {
543 unreadChar(lexer, offset);
544 lexer->error = yajl_lex_invalid_string;
545 tok = yajl_tok_error;
546 goto lexed;
547 }
548 } while (*(++want));
549 tok = yajl_tok_bool;
550 goto lexed;
551 }
552 case 'n': {
553 const char * want = "ull";
554 do {
555 if (*offset >= jsonTextLen) {
556 tok = yajl_tok_eof;
557 goto lexed;
558 }
559 c = readChar(lexer, jsonText, offset);
560 if (c != *want) {
561 unreadChar(lexer, offset);
562 lexer->error = yajl_lex_invalid_string;
563 tok = yajl_tok_error;
564 goto lexed;
565 }
566 } while (*(++want));
567 tok = yajl_tok_null;
568 goto lexed;
569 }
570 case '"': {
571 tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
572 jsonTextLen, offset);
573 goto lexed;
574 }
575 case '-':
576 case '0': case '1': case '2': case '3': case '4':
577 case '5': case '6': case '7': case '8': case '9': {
578 /* integer parsing wants to start from the beginning */
579 unreadChar(lexer, offset);
580 tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
581 jsonTextLen, offset);
582 goto lexed;
583 }
584 case '/':
585 /* hey, look, a probable comment! If comments are disabled
586 * it's an error. */
587 if (!lexer->allowComments) {
588 unreadChar(lexer, offset);
589 lexer->error = yajl_lex_unallowed_comment;
590 tok = yajl_tok_error;
591 goto lexed;
592 }
593 /* if comments are enabled, then we should try to lex
594 * the thing. possible outcomes are
595 * - successful lex (tok_comment, which means continue),
596 * - malformed comment opening (slash not followed by
597 * '*' or '/') (tok_error)
598 * - eof hit. (tok_eof) */
599 tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
600 jsonTextLen, offset);
601 if (tok == yajl_tok_comment) {
602 /* "error" is silly, but that's the initial
603 * state of tok. guilty until proven innocent. */
604 tok = yajl_tok_error;
605 yajl_buf_clear(lexer->buf);
606 lexer->bufInUse = 0;
607 startOffset = *offset;
608 break;
609 }
610 /* hit error or eof, bail */
611 goto lexed;
612 default:
613 lexer->error = yajl_lex_invalid_char;
614 tok = yajl_tok_error;
615 goto lexed;
616 }
617 }
618
619
620 lexed:
621 /* need to append to buffer if the buffer is in use or
622 * if it's an EOF token */
623 if (tok == yajl_tok_eof || lexer->bufInUse) {
624 if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
625 lexer->bufInUse = 1;
626 yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
627 lexer->bufOff = 0;
628
629 if (tok != yajl_tok_eof) {
630 *outBuf = yajl_buf_data(lexer->buf);
631 *outLen = yajl_buf_len(lexer->buf);
632 lexer->bufInUse = 0;
633 }
634 } else if (tok != yajl_tok_error) {
635 *outBuf = jsonText + startOffset;
636 *outLen = *offset - startOffset;
637 }
638
639 /* special case for strings. skip the quotes. */
640 if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
641 {
642 assert(*outLen >= 2);
643 (*outBuf)++;
644 *outLen -= 2;
645 }
646
647
648 #ifdef YAJL_LEXER_DEBUG
649 if (tok == yajl_tok_error) {
650 printf("lexical error: %s\n",
651 yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
652 } else if (tok == yajl_tok_eof) {
653 printf("EOF hit\n");
654 } else {
655 printf("lexed %s: '", tokToStr(tok));
656 fwrite(*outBuf, 1, *outLen, stdout);
657 printf("'\n");
658 }
659 #endif
660
661 return tok;
662 }
663
664 const char *
665 yajl_lex_error_to_string(yajl_lex_error error)
666 {
667 switch (error) {
668 case yajl_lex_e_ok:
669 return "ok, no error";
670 case yajl_lex_string_invalid_utf8:
671 return "invalid bytes in UTF8 string.";
672 case yajl_lex_string_invalid_escaped_char:
673 return "inside a string, '\\' occurs before a character "
674 "which it may not.";
675 case yajl_lex_string_invalid_json_char:
676 return "invalid character inside string.";
677 case yajl_lex_string_invalid_hex_char:
678 return "invalid (non-hex) character occurs after '\\u' inside "
679 "string.";
680 case yajl_lex_invalid_char:
681 return "invalid char in json text.";
682 case yajl_lex_invalid_string:
683 return "invalid string in json text.";
684 case yajl_lex_missing_integer_after_exponent:
685 return "malformed number, a digit is required after the exponent.";
686 case yajl_lex_missing_integer_after_decimal:
687 return "malformed number, a digit is required after the "
688 "decimal point.";
689 case yajl_lex_missing_integer_after_minus:
690 return "malformed number, a digit is required after the "
691 "minus sign.";
692 case yajl_lex_unallowed_comment:
693 return "probable comment found in input text, comments are "
694 "not enabled.";
695 }
696 return "unknown error code";
697 }
698
699
700 /** allows access to more specific information about the lexical
701 * error when yajl_lex_lex returns yajl_tok_error. */
702 yajl_lex_error
703 yajl_lex_get_error(yajl_lexer lexer)
704 {
705 if (lexer == NULL) return (yajl_lex_error) -1;
706 return lexer->error;
707 }
708
709 unsigned int yajl_lex_current_line(yajl_lexer lexer)
710 {
711 return lexer->lineOff;
712 }
713
714 unsigned int yajl_lex_current_char(yajl_lexer lexer)
715 {
716 return lexer->charOff;
717 }
718
719 yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
720 unsigned int jsonTextLen, unsigned int offset)
721 {
722 const unsigned char * outBuf;
723 unsigned int outLen;
724 unsigned int bufLen = yajl_buf_len(lexer->buf);
725 unsigned int bufOff = lexer->bufOff;
726 unsigned int bufInUse = lexer->bufInUse;
727 yajl_tok tok;
728
729 tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
730 &outBuf, &outLen);
731
732 lexer->bufOff = bufOff;
733 lexer->bufInUse = bufInUse;
734 yajl_buf_truncate(lexer->buf, bufLen);
735
736 return tok;
737 }
This page took 0.061723 seconds and 3 git commands to generate.