]> Dogcows Code - chaz/yoink/blob - yajl/src/yajl_encode.c
minor cleanups
[chaz/yoink] / yajl / src / yajl_encode.c
1 /*
2 * Copyright 2007-2009, Lloyd Hilaiel.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. Neither the name of Lloyd Hilaiel nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include "yajl_encode.h"
34
35 #include <assert.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39
40 static void CharToHex(unsigned char c, char * hexBuf)
41 {
42 const char * hexchar = "0123456789ABCDEF";
43 hexBuf[0] = hexchar[c >> 4];
44 hexBuf[1] = hexchar[c & 0x0F];
45 }
46
47 void
48 yajl_string_encode(yajl_buf buf, const unsigned char * str,
49 unsigned int len)
50 {
51 unsigned int beg = 0;
52 unsigned int end = 0;
53 char hexBuf[7];
54 hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
55 hexBuf[6] = 0;
56
57 while (end < len) {
58 const char * escaped = NULL;
59 switch (str[end]) {
60 case '\r': escaped = "\\r"; break;
61 case '\n': escaped = "\\n"; break;
62 case '\\': escaped = "\\\\"; break;
63 /* case '/': escaped = "\\/"; break; */
64 case '"': escaped = "\\\""; break;
65 case '\f': escaped = "\\f"; break;
66 case '\b': escaped = "\\b"; break;
67 case '\t': escaped = "\\t"; break;
68 default:
69 if ((unsigned char) str[end] < 32) {
70 CharToHex(str[end], hexBuf + 4);
71 escaped = hexBuf;
72 }
73 break;
74 }
75 if (escaped != NULL) {
76 yajl_buf_append(buf, str + beg, end - beg);
77 yajl_buf_append(buf, escaped, strlen(escaped));
78 beg = ++end;
79 } else {
80 ++end;
81 }
82 }
83 yajl_buf_append(buf, str + beg, end - beg);
84 }
85
86 static void hexToDigit(unsigned int * val, const unsigned char * hex)
87 {
88 unsigned int i;
89 for (i=0;i<4;i++) {
90 unsigned char c = hex[i];
91 if (c >= 'A') c = (c & ~0x20) - 7;
92 c -= '0';
93 assert(!(c & 0xF0));
94 *val = (*val << 4) | c;
95 }
96 }
97
98 static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf)
99 {
100 if (codepoint < 0x80) {
101 utf8Buf[0] = (char) codepoint;
102 utf8Buf[1] = 0;
103 } else if (codepoint < 0x0800) {
104 utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
105 utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
106 utf8Buf[2] = 0;
107 } else if (codepoint < 0x10000) {
108 utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
109 utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
110 utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
111 utf8Buf[3] = 0;
112 } else if (codepoint < 0x200000) {
113 utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
114 utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
115 utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
116 utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
117 utf8Buf[4] = 0;
118 } else {
119 utf8Buf[0] = '?';
120 utf8Buf[1] = 0;
121 }
122 }
123
124 void yajl_string_decode(yajl_buf buf, const unsigned char * str,
125 unsigned int len)
126 {
127 unsigned int beg = 0;
128 unsigned int end = 0;
129
130 while (end < len) {
131 if (str[end] == '\\') {
132 char utf8Buf[5];
133 const char * unescaped = "?";
134 yajl_buf_append(buf, str + beg, end - beg);
135 switch (str[++end]) {
136 case 'r': unescaped = "\r"; break;
137 case 'n': unescaped = "\n"; break;
138 case '\\': unescaped = "\\"; break;
139 case '/': unescaped = "/"; break;
140 case '"': unescaped = "\""; break;
141 case 'f': unescaped = "\f"; break;
142 case 'b': unescaped = "\b"; break;
143 case 't': unescaped = "\t"; break;
144 case 'u': {
145 unsigned int codepoint = 0;
146 hexToDigit(&codepoint, str + ++end);
147 end+=3;
148 /* check if this is a surrogate */
149 if ((codepoint & 0xFC00) == 0xD800) {
150 end++;
151 if (str[end] == '\\' && str[end + 1] == 'u') {
152 unsigned int surrogate = 0;
153 hexToDigit(&surrogate, str + end + 2);
154 codepoint =
155 (((codepoint & 0x3F) << 10) |
156 ((((codepoint >> 6) & 0xF) + 1) << 16) |
157 (surrogate & 0x3FF));
158 end += 5;
159 } else {
160 unescaped = "?";
161 break;
162 }
163 }
164
165 Utf32toUtf8(codepoint, utf8Buf);
166 unescaped = utf8Buf;
167 break;
168 }
169 default:
170 assert("this should never happen" == NULL);
171 }
172 yajl_buf_append(buf, unescaped, strlen(unescaped));
173 beg = ++end;
174 } else {
175 end++;
176 }
177 }
178 yajl_buf_append(buf, str + beg, end - beg);
179 }
This page took 0.037732 seconds and 4 git commands to generate.