OLD | NEW |
| (Empty) |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/json_reader.h" | |
6 | |
7 #include "base/float_util.h" | |
8 #include "base/logging.h" | |
9 #include "base/scoped_ptr.h" | |
10 #include "base/string_util.h" | |
11 #include "base/utf_string_conversions.h" | |
12 #include "base/values.h" | |
13 | |
14 static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN, | |
15 0, 0); | |
16 static const int kStackLimit = 100; | |
17 | |
18 namespace { | |
19 | |
20 inline int HexToInt(wchar_t c) { | |
21 if ('0' <= c && c <= '9') { | |
22 return c - '0'; | |
23 } else if ('A' <= c && c <= 'F') { | |
24 return c - 'A' + 10; | |
25 } else if ('a' <= c && c <= 'f') { | |
26 return c - 'a' + 10; | |
27 } | |
28 NOTREACHED(); | |
29 return 0; | |
30 } | |
31 | |
32 // A helper method for ParseNumberToken. It reads an int from the end of | |
33 // token. The method returns false if there is no valid integer at the end of | |
34 // the token. | |
35 bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) { | |
36 wchar_t first = token.NextChar(); | |
37 int len = 0; | |
38 | |
39 // Read in more digits | |
40 wchar_t c = first; | |
41 while ('\0' != c && '0' <= c && c <= '9') { | |
42 ++token.length; | |
43 ++len; | |
44 c = token.NextChar(); | |
45 } | |
46 // We need at least 1 digit. | |
47 if (len == 0) | |
48 return false; | |
49 | |
50 if (!can_have_leading_zeros && len > 1 && '0' == first) | |
51 return false; | |
52 | |
53 return true; | |
54 } | |
55 | |
56 // A helper method for ParseStringToken. It reads |digits| hex digits from the | |
57 // token. If the sequence if digits is not valid (contains other characters), | |
58 // the method returns false. | |
59 bool ReadHexDigits(JSONReader::Token& token, int digits) { | |
60 for (int i = 1; i <= digits; ++i) { | |
61 wchar_t c = *(token.begin + token.length + i); | |
62 if ('\0' == c) | |
63 return false; | |
64 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || | |
65 ('A' <= c && c <= 'F'))) { | |
66 return false; | |
67 } | |
68 } | |
69 | |
70 token.length += digits; | |
71 return true; | |
72 } | |
73 | |
74 } // anonymous namespace | |
75 | |
76 const char* JSONReader::kBadRootElementType = | |
77 "Root value must be an array or object."; | |
78 const char* JSONReader::kInvalidEscape = | |
79 "Invalid escape sequence."; | |
80 const char* JSONReader::kSyntaxError = | |
81 "Syntax error."; | |
82 const char* JSONReader::kTrailingComma = | |
83 "Trailing comma not allowed."; | |
84 const char* JSONReader::kTooMuchNesting = | |
85 "Too much nesting."; | |
86 const char* JSONReader::kUnexpectedDataAfterRoot = | |
87 "Unexpected data after root element."; | |
88 const char* JSONReader::kUnsupportedEncoding = | |
89 "Unsupported encoding. JSON must be UTF-8."; | |
90 const char* JSONReader::kUnquotedDictionaryKey = | |
91 "Dictionary keys must be quoted."; | |
92 | |
93 /* static */ | |
94 Value* JSONReader::Read(const std::string& json, | |
95 bool allow_trailing_comma) { | |
96 return ReadAndReturnError(json, allow_trailing_comma, NULL); | |
97 } | |
98 | |
99 /* static */ | |
100 Value* JSONReader::ReadAndReturnError(const std::string& json, | |
101 bool allow_trailing_comma, | |
102 std::string *error_message_out) { | |
103 JSONReader reader = JSONReader(); | |
104 Value* root = reader.JsonToValue(json, true, allow_trailing_comma); | |
105 if (root) | |
106 return root; | |
107 | |
108 if (error_message_out) | |
109 *error_message_out = reader.error_message(); | |
110 | |
111 return NULL; | |
112 } | |
113 | |
114 /* static */ | |
115 std::string JSONReader::FormatErrorMessage(int line, int column, | |
116 const char* description) { | |
117 return StringPrintf("Line: %i, column: %i, %s", | |
118 line, column, description); | |
119 } | |
120 | |
121 JSONReader::JSONReader() | |
122 : start_pos_(NULL), json_pos_(NULL), stack_depth_(0), | |
123 allow_trailing_comma_(false) {} | |
124 | |
125 Value* JSONReader::JsonToValue(const std::string& json, bool check_root, | |
126 bool allow_trailing_comma) { | |
127 // The input must be in UTF-8. | |
128 if (!IsStringUTF8(json.c_str())) { | |
129 error_message_ = kUnsupportedEncoding; | |
130 return NULL; | |
131 } | |
132 | |
133 // The conversion from UTF8 to wstring removes null bytes for us | |
134 // (a good thing). | |
135 std::wstring json_wide(UTF8ToWide(json)); | |
136 start_pos_ = json_wide.c_str(); | |
137 | |
138 // When the input JSON string starts with a UTF-8 Byte-Order-Mark | |
139 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode | |
140 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from | |
141 // mis-treating a Unicode BOM as an invalid character and returning NULL, | |
142 // skip a converted Unicode BOM if it exists. | |
143 if (!json_wide.empty() && start_pos_[0] == 0xFEFF) { | |
144 ++start_pos_; | |
145 } | |
146 | |
147 json_pos_ = start_pos_; | |
148 allow_trailing_comma_ = allow_trailing_comma; | |
149 stack_depth_ = 0; | |
150 error_message_.clear(); | |
151 | |
152 scoped_ptr<Value> root(BuildValue(check_root)); | |
153 if (root.get()) { | |
154 if (ParseToken().type == Token::END_OF_INPUT) { | |
155 return root.release(); | |
156 } else { | |
157 SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_); | |
158 } | |
159 } | |
160 | |
161 // Default to calling errors "syntax errors". | |
162 if (error_message_.empty()) | |
163 SetErrorMessage(kSyntaxError, json_pos_); | |
164 | |
165 return NULL; | |
166 } | |
167 | |
168 Value* JSONReader::BuildValue(bool is_root) { | |
169 ++stack_depth_; | |
170 if (stack_depth_ > kStackLimit) { | |
171 SetErrorMessage(kTooMuchNesting, json_pos_); | |
172 return NULL; | |
173 } | |
174 | |
175 Token token = ParseToken(); | |
176 // The root token must be an array or an object. | |
177 if (is_root && token.type != Token::OBJECT_BEGIN && | |
178 token.type != Token::ARRAY_BEGIN) { | |
179 SetErrorMessage(kBadRootElementType, json_pos_); | |
180 return NULL; | |
181 } | |
182 | |
183 scoped_ptr<Value> node; | |
184 | |
185 switch (token.type) { | |
186 case Token::END_OF_INPUT: | |
187 case Token::INVALID_TOKEN: | |
188 return NULL; | |
189 | |
190 case Token::NULL_TOKEN: | |
191 node.reset(Value::CreateNullValue()); | |
192 break; | |
193 | |
194 case Token::BOOL_TRUE: | |
195 node.reset(Value::CreateBooleanValue(true)); | |
196 break; | |
197 | |
198 case Token::BOOL_FALSE: | |
199 node.reset(Value::CreateBooleanValue(false)); | |
200 break; | |
201 | |
202 case Token::NUMBER: | |
203 node.reset(DecodeNumber(token)); | |
204 if (!node.get()) | |
205 return NULL; | |
206 break; | |
207 | |
208 case Token::STRING: | |
209 node.reset(DecodeString(token)); | |
210 if (!node.get()) | |
211 return NULL; | |
212 break; | |
213 | |
214 case Token::ARRAY_BEGIN: | |
215 { | |
216 json_pos_ += token.length; | |
217 token = ParseToken(); | |
218 | |
219 node.reset(new ListValue()); | |
220 while (token.type != Token::ARRAY_END) { | |
221 Value* array_node = BuildValue(false); | |
222 if (!array_node) | |
223 return NULL; | |
224 static_cast<ListValue*>(node.get())->Append(array_node); | |
225 | |
226 // After a list value, we expect a comma or the end of the list. | |
227 token = ParseToken(); | |
228 if (token.type == Token::LIST_SEPARATOR) { | |
229 json_pos_ += token.length; | |
230 token = ParseToken(); | |
231 // Trailing commas are invalid according to the JSON RFC, but some | |
232 // consumers need the parsing leniency, so handle accordingly. | |
233 if (token.type == Token::ARRAY_END) { | |
234 if (!allow_trailing_comma_) { | |
235 SetErrorMessage(kTrailingComma, json_pos_); | |
236 return NULL; | |
237 } | |
238 // Trailing comma OK, stop parsing the Array. | |
239 break; | |
240 } | |
241 } else if (token.type != Token::ARRAY_END) { | |
242 // Unexpected value after list value. Bail out. | |
243 return NULL; | |
244 } | |
245 } | |
246 if (token.type != Token::ARRAY_END) { | |
247 return NULL; | |
248 } | |
249 break; | |
250 } | |
251 | |
252 case Token::OBJECT_BEGIN: | |
253 { | |
254 json_pos_ += token.length; | |
255 token = ParseToken(); | |
256 | |
257 node.reset(new DictionaryValue); | |
258 while (token.type != Token::OBJECT_END) { | |
259 if (token.type != Token::STRING) { | |
260 SetErrorMessage(kUnquotedDictionaryKey, json_pos_); | |
261 return NULL; | |
262 } | |
263 scoped_ptr<Value> dict_key_value(DecodeString(token)); | |
264 if (!dict_key_value.get()) | |
265 return NULL; | |
266 | |
267 // Convert the key into a wstring. | |
268 std::wstring dict_key; | |
269 bool success = dict_key_value->GetAsString(&dict_key); | |
270 DCHECK(success); | |
271 | |
272 json_pos_ += token.length; | |
273 token = ParseToken(); | |
274 if (token.type != Token::OBJECT_PAIR_SEPARATOR) | |
275 return NULL; | |
276 | |
277 json_pos_ += token.length; | |
278 token = ParseToken(); | |
279 Value* dict_value = BuildValue(false); | |
280 if (!dict_value) | |
281 return NULL; | |
282 static_cast<DictionaryValue*>(node.get())->Set(dict_key, dict_value); | |
283 | |
284 // After a key/value pair, we expect a comma or the end of the | |
285 // object. | |
286 token = ParseToken(); | |
287 if (token.type == Token::LIST_SEPARATOR) { | |
288 json_pos_ += token.length; | |
289 token = ParseToken(); | |
290 // Trailing commas are invalid according to the JSON RFC, but some | |
291 // consumers need the parsing leniency, so handle accordingly. | |
292 if (token.type == Token::OBJECT_END) { | |
293 if (!allow_trailing_comma_) { | |
294 SetErrorMessage(kTrailingComma, json_pos_); | |
295 return NULL; | |
296 } | |
297 // Trailing comma OK, stop parsing the Object. | |
298 break; | |
299 } | |
300 } else if (token.type != Token::OBJECT_END) { | |
301 // Unexpected value after last object value. Bail out. | |
302 return NULL; | |
303 } | |
304 } | |
305 if (token.type != Token::OBJECT_END) | |
306 return NULL; | |
307 | |
308 break; | |
309 } | |
310 | |
311 default: | |
312 // We got a token that's not a value. | |
313 return NULL; | |
314 } | |
315 json_pos_ += token.length; | |
316 | |
317 --stack_depth_; | |
318 return node.release(); | |
319 } | |
320 | |
321 JSONReader::Token JSONReader::ParseNumberToken() { | |
322 // We just grab the number here. We validate the size in DecodeNumber. | |
323 // According to RFC4627, a valid number is: [minus] int [frac] [exp] | |
324 Token token(Token::NUMBER, json_pos_, 0); | |
325 wchar_t c = *json_pos_; | |
326 if ('-' == c) { | |
327 ++token.length; | |
328 c = token.NextChar(); | |
329 } | |
330 | |
331 if (!ReadInt(token, false)) | |
332 return kInvalidToken; | |
333 | |
334 // Optional fraction part | |
335 c = token.NextChar(); | |
336 if ('.' == c) { | |
337 ++token.length; | |
338 if (!ReadInt(token, true)) | |
339 return kInvalidToken; | |
340 c = token.NextChar(); | |
341 } | |
342 | |
343 // Optional exponent part | |
344 if ('e' == c || 'E' == c) { | |
345 ++token.length; | |
346 c = token.NextChar(); | |
347 if ('-' == c || '+' == c) { | |
348 ++token.length; | |
349 c = token.NextChar(); | |
350 } | |
351 if (!ReadInt(token, true)) | |
352 return kInvalidToken; | |
353 } | |
354 | |
355 return token; | |
356 } | |
357 | |
358 Value* JSONReader::DecodeNumber(const Token& token) { | |
359 const std::wstring num_string(token.begin, token.length); | |
360 | |
361 int num_int; | |
362 if (StringToInt(WideToUTF16Hack(num_string), &num_int)) | |
363 return Value::CreateIntegerValue(num_int); | |
364 | |
365 double num_double; | |
366 if (StringToDouble(WideToUTF16Hack(num_string), &num_double) && | |
367 base::IsFinite(num_double)) | |
368 return Value::CreateRealValue(num_double); | |
369 | |
370 return NULL; | |
371 } | |
372 | |
373 JSONReader::Token JSONReader::ParseStringToken() { | |
374 Token token(Token::STRING, json_pos_, 1); | |
375 wchar_t c = token.NextChar(); | |
376 while ('\0' != c) { | |
377 if ('\\' == c) { | |
378 ++token.length; | |
379 c = token.NextChar(); | |
380 // Make sure the escaped char is valid. | |
381 switch (c) { | |
382 case 'x': | |
383 if (!ReadHexDigits(token, 2)) { | |
384 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); | |
385 return kInvalidToken; | |
386 } | |
387 break; | |
388 case 'u': | |
389 if (!ReadHexDigits(token, 4)) { | |
390 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); | |
391 return kInvalidToken; | |
392 } | |
393 break; | |
394 case '\\': | |
395 case '/': | |
396 case 'b': | |
397 case 'f': | |
398 case 'n': | |
399 case 'r': | |
400 case 't': | |
401 case 'v': | |
402 case '"': | |
403 break; | |
404 default: | |
405 SetErrorMessage(kInvalidEscape, json_pos_ + token.length); | |
406 return kInvalidToken; | |
407 } | |
408 } else if ('"' == c) { | |
409 ++token.length; | |
410 return token; | |
411 } | |
412 ++token.length; | |
413 c = token.NextChar(); | |
414 } | |
415 return kInvalidToken; | |
416 } | |
417 | |
418 Value* JSONReader::DecodeString(const Token& token) { | |
419 std::wstring decoded_str; | |
420 decoded_str.reserve(token.length - 2); | |
421 | |
422 for (int i = 1; i < token.length - 1; ++i) { | |
423 wchar_t c = *(token.begin + i); | |
424 if ('\\' == c) { | |
425 ++i; | |
426 c = *(token.begin + i); | |
427 switch (c) { | |
428 case '"': | |
429 case '/': | |
430 case '\\': | |
431 decoded_str.push_back(c); | |
432 break; | |
433 case 'b': | |
434 decoded_str.push_back('\b'); | |
435 break; | |
436 case 'f': | |
437 decoded_str.push_back('\f'); | |
438 break; | |
439 case 'n': | |
440 decoded_str.push_back('\n'); | |
441 break; | |
442 case 'r': | |
443 decoded_str.push_back('\r'); | |
444 break; | |
445 case 't': | |
446 decoded_str.push_back('\t'); | |
447 break; | |
448 case 'v': | |
449 decoded_str.push_back('\v'); | |
450 break; | |
451 | |
452 case 'x': | |
453 decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) + | |
454 HexToInt(*(token.begin + i + 2))); | |
455 i += 2; | |
456 break; | |
457 case 'u': | |
458 decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) + | |
459 (HexToInt(*(token.begin + i + 2)) << 8) + | |
460 (HexToInt(*(token.begin + i + 3)) << 4) + | |
461 HexToInt(*(token.begin + i + 4))); | |
462 i += 4; | |
463 break; | |
464 | |
465 default: | |
466 // We should only have valid strings at this point. If not, | |
467 // ParseStringToken didn't do it's job. | |
468 NOTREACHED(); | |
469 return NULL; | |
470 } | |
471 } else { | |
472 // Not escaped | |
473 decoded_str.push_back(c); | |
474 } | |
475 } | |
476 return Value::CreateStringValue(decoded_str); | |
477 } | |
478 | |
479 JSONReader::Token JSONReader::ParseToken() { | |
480 static const std::wstring kNullString(L"null"); | |
481 static const std::wstring kTrueString(L"true"); | |
482 static const std::wstring kFalseString(L"false"); | |
483 | |
484 EatWhitespaceAndComments(); | |
485 | |
486 Token token(Token::INVALID_TOKEN, 0, 0); | |
487 switch (*json_pos_) { | |
488 case '\0': | |
489 token.type = Token::END_OF_INPUT; | |
490 break; | |
491 | |
492 case 'n': | |
493 if (NextStringMatch(kNullString)) | |
494 token = Token(Token::NULL_TOKEN, json_pos_, 4); | |
495 break; | |
496 | |
497 case 't': | |
498 if (NextStringMatch(kTrueString)) | |
499 token = Token(Token::BOOL_TRUE, json_pos_, 4); | |
500 break; | |
501 | |
502 case 'f': | |
503 if (NextStringMatch(kFalseString)) | |
504 token = Token(Token::BOOL_FALSE, json_pos_, 5); | |
505 break; | |
506 | |
507 case '[': | |
508 token = Token(Token::ARRAY_BEGIN, json_pos_, 1); | |
509 break; | |
510 | |
511 case ']': | |
512 token = Token(Token::ARRAY_END, json_pos_, 1); | |
513 break; | |
514 | |
515 case ',': | |
516 token = Token(Token::LIST_SEPARATOR, json_pos_, 1); | |
517 break; | |
518 | |
519 case '{': | |
520 token = Token(Token::OBJECT_BEGIN, json_pos_, 1); | |
521 break; | |
522 | |
523 case '}': | |
524 token = Token(Token::OBJECT_END, json_pos_, 1); | |
525 break; | |
526 | |
527 case ':': | |
528 token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1); | |
529 break; | |
530 | |
531 case '0': | |
532 case '1': | |
533 case '2': | |
534 case '3': | |
535 case '4': | |
536 case '5': | |
537 case '6': | |
538 case '7': | |
539 case '8': | |
540 case '9': | |
541 case '-': | |
542 token = ParseNumberToken(); | |
543 break; | |
544 | |
545 case '"': | |
546 token = ParseStringToken(); | |
547 break; | |
548 } | |
549 return token; | |
550 } | |
551 | |
552 bool JSONReader::NextStringMatch(const std::wstring& str) { | |
553 for (size_t i = 0; i < str.length(); ++i) { | |
554 if ('\0' == *json_pos_) | |
555 return false; | |
556 if (*(json_pos_ + i) != str[i]) | |
557 return false; | |
558 } | |
559 return true; | |
560 } | |
561 | |
562 void JSONReader::EatWhitespaceAndComments() { | |
563 while ('\0' != *json_pos_) { | |
564 switch (*json_pos_) { | |
565 case ' ': | |
566 case '\n': | |
567 case '\r': | |
568 case '\t': | |
569 ++json_pos_; | |
570 break; | |
571 case '/': | |
572 // TODO(tc): This isn't in the RFC so it should be a parser flag. | |
573 if (!EatComment()) | |
574 return; | |
575 break; | |
576 default: | |
577 // Not a whitespace char, just exit. | |
578 return; | |
579 } | |
580 } | |
581 } | |
582 | |
583 bool JSONReader::EatComment() { | |
584 if ('/' != *json_pos_) | |
585 return false; | |
586 | |
587 wchar_t next_char = *(json_pos_ + 1); | |
588 if ('/' == next_char) { | |
589 // Line comment, read until \n or \r | |
590 json_pos_ += 2; | |
591 while ('\0' != *json_pos_) { | |
592 switch (*json_pos_) { | |
593 case '\n': | |
594 case '\r': | |
595 ++json_pos_; | |
596 return true; | |
597 default: | |
598 ++json_pos_; | |
599 } | |
600 } | |
601 } else if ('*' == next_char) { | |
602 // Block comment, read until */ | |
603 json_pos_ += 2; | |
604 while ('\0' != *json_pos_) { | |
605 if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) { | |
606 json_pos_ += 2; | |
607 return true; | |
608 } | |
609 ++json_pos_; | |
610 } | |
611 } else { | |
612 return false; | |
613 } | |
614 return true; | |
615 } | |
616 | |
617 void JSONReader::SetErrorMessage(const char* description, | |
618 const wchar_t* error_pos) { | |
619 int line_number = 1; | |
620 int column_number = 1; | |
621 | |
622 // Figure out the line and column the error occured at. | |
623 for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) { | |
624 if (*pos == '\0') { | |
625 NOTREACHED(); | |
626 return; | |
627 } | |
628 | |
629 if (*pos == '\n') { | |
630 ++line_number; | |
631 column_number = 1; | |
632 } else { | |
633 ++column_number; | |
634 } | |
635 } | |
636 | |
637 error_message_ = FormatErrorMessage(line_number, column_number, description); | |
638 } | |
OLD | NEW |