OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "platform/JSONParser.h" | |
6 | |
7 #include "platform/Decimal.h" | |
8 #include "platform/JSONValues.h" | |
9 #include "wtf/text/StringBuilder.h" | |
10 #include "wtf/text/StringToNumber.h" | |
11 | |
12 namespace blink { | |
13 | |
14 namespace { | |
15 | |
16 const int stackLimit = 1000; | |
17 | |
18 enum Token { | |
19 ObjectBegin, | |
20 ObjectEnd, | |
21 ArrayBegin, | |
22 ArrayEnd, | |
23 StringLiteral, | |
24 Number, | |
25 BoolTrue, | |
26 BoolFalse, | |
27 NullToken, | |
28 ListSeparator, | |
29 ObjectPairSeparator, | |
30 InvalidToken, | |
31 }; | |
32 | |
33 const char* const nullString = "null"; | |
34 const char* const trueString = "true"; | |
35 const char* const falseString = "false"; | |
36 | |
37 template<typename CharType> | |
38 bool parseConstToken(const CharType* start, const CharType* end, const CharType*
* tokenEnd, const char* token) | |
39 { | |
40 while (start < end && *token != '\0' && *start++ == *token++) { } | |
41 if (*token != '\0') | |
42 return false; | |
43 *tokenEnd = start; | |
44 return true; | |
45 } | |
46 | |
47 template<typename CharType> | |
48 bool readInt(const CharType* start, const CharType* end, const CharType** tokenE
nd, bool canHaveLeadingZeros) | |
49 { | |
50 if (start == end) | |
51 return false; | |
52 bool haveLeadingZero = '0' == *start; | |
53 int length = 0; | |
54 while (start < end && '0' <= *start && *start <= '9') { | |
55 ++start; | |
56 ++length; | |
57 } | |
58 if (!length) | |
59 return false; | |
60 if (!canHaveLeadingZeros && length > 1 && haveLeadingZero) | |
61 return false; | |
62 *tokenEnd = start; | |
63 return true; | |
64 } | |
65 | |
66 template<typename CharType> | |
67 bool parseNumberToken(const CharType* start, const CharType* end, const CharType
** tokenEnd) | |
68 { | |
69 // We just grab the number here. We validate the size in DecodeNumber. | |
70 // According to RFC4627, a valid number is: [minus] int [frac] [exp] | |
71 if (start == end) | |
72 return false; | |
73 CharType c = *start; | |
74 if ('-' == c) | |
75 ++start; | |
76 | |
77 if (!readInt(start, end, &start, false)) | |
78 return false; | |
79 if (start == end) { | |
80 *tokenEnd = start; | |
81 return true; | |
82 } | |
83 | |
84 // Optional fraction part | |
85 c = *start; | |
86 if ('.' == c) { | |
87 ++start; | |
88 if (!readInt(start, end, &start, true)) | |
89 return false; | |
90 if (start == end) { | |
91 *tokenEnd = start; | |
92 return true; | |
93 } | |
94 c = *start; | |
95 } | |
96 | |
97 // Optional exponent part | |
98 if ('e' == c || 'E' == c) { | |
99 ++start; | |
100 if (start == end) | |
101 return false; | |
102 c = *start; | |
103 if ('-' == c || '+' == c) { | |
104 ++start; | |
105 if (start == end) | |
106 return false; | |
107 } | |
108 if (!readInt(start, end, &start, true)) | |
109 return false; | |
110 } | |
111 | |
112 *tokenEnd = start; | |
113 return true; | |
114 } | |
115 | |
116 template<typename CharType> | |
117 bool readHexDigits(const CharType* start, const CharType* end, const CharType**
tokenEnd, int digits) | |
118 { | |
119 if (end - start < digits) | |
120 return false; | |
121 for (int i = 0; i < digits; ++i) { | |
122 CharType c = *start++; | |
123 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c
<= 'F'))) | |
124 return false; | |
125 } | |
126 *tokenEnd = start; | |
127 return true; | |
128 } | |
129 | |
130 template<typename CharType> | |
131 bool parseStringToken(const CharType* start, const CharType* end, const CharType
** tokenEnd) | |
132 { | |
133 while (start < end) { | |
134 CharType c = *start++; | |
135 if ('\\' == c) { | |
136 c = *start++; | |
137 // Make sure the escaped char is valid. | |
138 switch (c) { | |
139 case 'x': | |
140 if (!readHexDigits(start, end, &start, 2)) | |
141 return false; | |
142 break; | |
143 case 'u': | |
144 if (!readHexDigits(start, end, &start, 4)) | |
145 return false; | |
146 break; | |
147 case '\\': | |
148 case '/': | |
149 case 'b': | |
150 case 'f': | |
151 case 'n': | |
152 case 'r': | |
153 case 't': | |
154 case 'v': | |
155 case '"': | |
156 break; | |
157 default: | |
158 return false; | |
159 } | |
160 } else if ('"' == c) { | |
161 *tokenEnd = start; | |
162 return true; | |
163 } | |
164 } | |
165 return false; | |
166 } | |
167 | |
168 template<typename CharType> | |
169 bool skipComment(const CharType* start, const CharType* end, const CharType** co
mmentEnd) | |
170 { | |
171 if (start == end) | |
172 return false; | |
173 | |
174 if (*start != '/' || start + 1 >= end) | |
175 return false; | |
176 ++start; | |
177 | |
178 if (*start == '/') { | |
179 // Single line comment, read to newline. | |
180 for (++start; start < end; ++start) { | |
181 if (*start == '\n' || *start == '\r') { | |
182 *commentEnd = start + 1; | |
183 return true; | |
184 } | |
185 } | |
186 *commentEnd = end; | |
187 // Comment reaches end-of-input, which is fine. | |
188 return true; | |
189 } | |
190 | |
191 if (*start == '*') { | |
192 CharType previous = '\0'; | |
193 // Block comment, read until end marker. | |
194 for (++start; start < end; previous = *start++) { | |
195 if (previous == '*' && *start == '/') { | |
196 *commentEnd = start + 1; | |
197 return true; | |
198 } | |
199 } | |
200 // Block comment must close before end-of-input. | |
201 return false; | |
202 } | |
203 | |
204 return false; | |
205 } | |
206 | |
207 template<typename CharType> | |
208 void skipWhitespaceAndComments(const CharType* start, const CharType* end, const
CharType** whitespaceEnd) | |
209 { | |
210 while (start < end) { | |
211 if (isSpaceOrNewline(*start)) { | |
212 ++start; | |
213 } else if (*start == '/') { | |
214 const CharType* commentEnd; | |
215 if (!skipComment(start, end, &commentEnd)) | |
216 break; | |
217 start = commentEnd; | |
218 } else { | |
219 break; | |
220 } | |
221 } | |
222 *whitespaceEnd = start; | |
223 } | |
224 | |
225 template<typename CharType> | |
226 Token parseToken(const CharType* start, const CharType* end, const CharType** to
kenStart, const CharType** tokenEnd) | |
227 { | |
228 skipWhitespaceAndComments(start, end, tokenStart); | |
229 start = *tokenStart; | |
230 | |
231 if (start == end) | |
232 return InvalidToken; | |
233 | |
234 switch (*start) { | |
235 case 'n': | |
236 if (parseConstToken(start, end, tokenEnd, nullString)) | |
237 return NullToken; | |
238 break; | |
239 case 't': | |
240 if (parseConstToken(start, end, tokenEnd, trueString)) | |
241 return BoolTrue; | |
242 break; | |
243 case 'f': | |
244 if (parseConstToken(start, end, tokenEnd, falseString)) | |
245 return BoolFalse; | |
246 break; | |
247 case '[': | |
248 *tokenEnd = start + 1; | |
249 return ArrayBegin; | |
250 case ']': | |
251 *tokenEnd = start + 1; | |
252 return ArrayEnd; | |
253 case ',': | |
254 *tokenEnd = start + 1; | |
255 return ListSeparator; | |
256 case '{': | |
257 *tokenEnd = start + 1; | |
258 return ObjectBegin; | |
259 case '}': | |
260 *tokenEnd = start + 1; | |
261 return ObjectEnd; | |
262 case ':': | |
263 *tokenEnd = start + 1; | |
264 return ObjectPairSeparator; | |
265 case '0': | |
266 case '1': | |
267 case '2': | |
268 case '3': | |
269 case '4': | |
270 case '5': | |
271 case '6': | |
272 case '7': | |
273 case '8': | |
274 case '9': | |
275 case '-': | |
276 if (parseNumberToken(start, end, tokenEnd)) | |
277 return Number; | |
278 break; | |
279 case '"': | |
280 if (parseStringToken(start + 1, end, tokenEnd)) | |
281 return StringLiteral; | |
282 break; | |
283 } | |
284 return InvalidToken; | |
285 } | |
286 | |
287 template<typename CharType> | |
288 inline int hexToInt(CharType c) | |
289 { | |
290 if ('0' <= c && c <= '9') | |
291 return c - '0'; | |
292 if ('A' <= c && c <= 'F') | |
293 return c - 'A' + 10; | |
294 if ('a' <= c && c <= 'f') | |
295 return c - 'a' + 10; | |
296 NOTREACHED(); | |
297 return 0; | |
298 } | |
299 | |
300 template<typename CharType> | |
301 bool decodeString(const CharType* start, const CharType* end, StringBuilder* out
put) | |
302 { | |
303 while (start < end) { | |
304 UChar c = *start++; | |
305 if ('\\' != c) { | |
306 output->append(c); | |
307 continue; | |
308 } | |
309 c = *start++; | |
310 | |
311 if (c == 'x') { | |
312 // \x is not supported. | |
313 return false; | |
314 } | |
315 | |
316 switch (c) { | |
317 case '"': | |
318 case '/': | |
319 case '\\': | |
320 break; | |
321 case 'b': | |
322 c = '\b'; | |
323 break; | |
324 case 'f': | |
325 c = '\f'; | |
326 break; | |
327 case 'n': | |
328 c = '\n'; | |
329 break; | |
330 case 'r': | |
331 c = '\r'; | |
332 break; | |
333 case 't': | |
334 c = '\t'; | |
335 break; | |
336 case 'v': | |
337 c = '\v'; | |
338 break; | |
339 case 'u': | |
340 c = (hexToInt(*start) << 12) + | |
341 (hexToInt(*(start + 1)) << 8) + | |
342 (hexToInt(*(start + 2)) << 4) + | |
343 hexToInt(*(start + 3)); | |
344 start += 4; | |
345 break; | |
346 default: | |
347 return false; | |
348 } | |
349 output->append(c); | |
350 } | |
351 return true; | |
352 } | |
353 | |
354 template<typename CharType> | |
355 bool decodeString(const CharType* start, const CharType* end, String* output) | |
356 { | |
357 if (start == end) { | |
358 *output = ""; | |
359 return true; | |
360 } | |
361 if (start > end) | |
362 return false; | |
363 StringBuilder buffer; | |
364 buffer.reserveCapacity(end - start); | |
365 if (!decodeString(start, end, &buffer)) | |
366 return false; | |
367 *output = buffer.toString(); | |
368 // Validate constructed utf16 string. | |
369 if (output->utf8(StrictUTF8Conversion).isNull()) | |
370 return false; | |
371 return true; | |
372 } | |
373 | |
374 template<typename CharType> | |
375 std::unique_ptr<JSONValue> buildValue(const CharType* start, const CharType* end
, const CharType** valueTokenEnd, int depth) | |
376 { | |
377 if (depth > stackLimit) | |
378 return nullptr; | |
379 | |
380 std::unique_ptr<JSONValue> result; | |
381 const CharType* tokenStart; | |
382 const CharType* tokenEnd; | |
383 Token token = parseToken(start, end, &tokenStart, &tokenEnd); | |
384 switch (token) { | |
385 case InvalidToken: | |
386 return nullptr; | |
387 case NullToken: | |
388 result = JSONValue::null(); | |
389 break; | |
390 case BoolTrue: | |
391 result = JSONBasicValue::create(true); | |
392 break; | |
393 case BoolFalse: | |
394 result = JSONBasicValue::create(false); | |
395 break; | |
396 case Number: { | |
397 bool ok; | |
398 double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok
); | |
399 if (Decimal::fromDouble(value).isInfinity()) | |
400 ok = false; | |
401 if (!ok) | |
402 return nullptr; | |
403 int number = static_cast<int>(value); | |
404 if (number == value) | |
405 result = JSONBasicValue::create(number); | |
406 else | |
407 result = JSONBasicValue::create(value); | |
408 break; | |
409 } | |
410 case StringLiteral: { | |
411 String value; | |
412 bool ok = decodeString(tokenStart + 1, tokenEnd - 1, &value); | |
413 if (!ok) | |
414 return nullptr; | |
415 result = JSONString::create(value); | |
416 break; | |
417 } | |
418 case ArrayBegin: { | |
419 std::unique_ptr<JSONArray> array = JSONArray::create(); | |
420 start = tokenEnd; | |
421 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
422 while (token != ArrayEnd) { | |
423 std::unique_ptr<JSONValue> arrayNode = buildValue(start, end, &token
End, depth + 1); | |
424 if (!arrayNode) | |
425 return nullptr; | |
426 array->pushValue(std::move(arrayNode)); | |
427 | |
428 // After a list value, we expect a comma or the end of the list. | |
429 start = tokenEnd; | |
430 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
431 if (token == ListSeparator) { | |
432 start = tokenEnd; | |
433 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
434 if (token == ArrayEnd) | |
435 return nullptr; | |
436 } else if (token != ArrayEnd) { | |
437 // Unexpected value after list value. Bail out. | |
438 return nullptr; | |
439 } | |
440 } | |
441 if (token != ArrayEnd) | |
442 return nullptr; | |
443 result = std::move(array); | |
444 break; | |
445 } | |
446 case ObjectBegin: { | |
447 std::unique_ptr<JSONObject> object = JSONObject::create(); | |
448 start = tokenEnd; | |
449 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
450 while (token != ObjectEnd) { | |
451 if (token != StringLiteral) | |
452 return nullptr; | |
453 String key; | |
454 if (!decodeString(tokenStart + 1, tokenEnd - 1, &key)) | |
455 return nullptr; | |
456 start = tokenEnd; | |
457 | |
458 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
459 if (token != ObjectPairSeparator) | |
460 return nullptr; | |
461 start = tokenEnd; | |
462 | |
463 std::unique_ptr<JSONValue> value = buildValue(start, end, &tokenEnd,
depth + 1); | |
464 if (!value) | |
465 return nullptr; | |
466 object->setValue(key, std::move(value)); | |
467 start = tokenEnd; | |
468 | |
469 // After a key/value pair, we expect a comma or the end of the | |
470 // object. | |
471 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
472 if (token == ListSeparator) { | |
473 start = tokenEnd; | |
474 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
475 if (token == ObjectEnd) | |
476 return nullptr; | |
477 } else if (token != ObjectEnd) { | |
478 // Unexpected value after last object value. Bail out. | |
479 return nullptr; | |
480 } | |
481 } | |
482 if (token != ObjectEnd) | |
483 return nullptr; | |
484 result = std::move(object); | |
485 break; | |
486 } | |
487 | |
488 default: | |
489 // We got a token that's not a value. | |
490 return nullptr; | |
491 } | |
492 | |
493 skipWhitespaceAndComments(tokenEnd, end, valueTokenEnd); | |
494 return result; | |
495 } | |
496 | |
497 template<typename CharType> | |
498 std::unique_ptr<JSONValue> parseJSONInternal(const CharType* start, unsigned len
gth) | |
499 { | |
500 const CharType* end = start + length; | |
501 const CharType *tokenEnd; | |
502 std::unique_ptr<JSONValue> value = buildValue(start, end, &tokenEnd, 0); | |
503 if (!value || tokenEnd != end) | |
504 return nullptr; | |
505 return value; | |
506 } | |
507 | |
508 } // anonymous namespace | |
509 | |
510 std::unique_ptr<JSONValue> parseJSON(const String& json) | |
511 { | |
512 if (json.isEmpty()) | |
513 return nullptr; | |
514 if (json.is8Bit()) | |
515 return parseJSONInternal(json.characters8(), json.length()); | |
516 return parseJSONInternal(json.characters16(), json.length()); | |
517 } | |
518 | |
519 } // namespace blink | |
OLD | NEW |