Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(68)

Side by Side Diff: base/json_reader.cc

Issue 316016: Move the json-related files into a separate json directory. This hopefully al... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/json_reader.h ('k') | base/json_reader_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json_reader.h"
6
7 #include "base/float_util.h"
8 #include "base/logging.h"
9 #include "base/scoped_ptr.h"
10 #include "base/string_util.h"
11 #include "base/utf_string_conversions.h"
12 #include "base/values.h"
13
14 static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
15 0, 0);
16 static const int kStackLimit = 100;
17
18 namespace {
19
20 inline int HexToInt(wchar_t c) {
21 if ('0' <= c && c <= '9') {
22 return c - '0';
23 } else if ('A' <= c && c <= 'F') {
24 return c - 'A' + 10;
25 } else if ('a' <= c && c <= 'f') {
26 return c - 'a' + 10;
27 }
28 NOTREACHED();
29 return 0;
30 }
31
32 // A helper method for ParseNumberToken. It reads an int from the end of
33 // token. The method returns false if there is no valid integer at the end of
34 // the token.
35 bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {
36 wchar_t first = token.NextChar();
37 int len = 0;
38
39 // Read in more digits
40 wchar_t c = first;
41 while ('\0' != c && '0' <= c && c <= '9') {
42 ++token.length;
43 ++len;
44 c = token.NextChar();
45 }
46 // We need at least 1 digit.
47 if (len == 0)
48 return false;
49
50 if (!can_have_leading_zeros && len > 1 && '0' == first)
51 return false;
52
53 return true;
54 }
55
56 // A helper method for ParseStringToken. It reads |digits| hex digits from the
57 // token. If the sequence if digits is not valid (contains other characters),
58 // the method returns false.
59 bool ReadHexDigits(JSONReader::Token& token, int digits) {
60 for (int i = 1; i <= digits; ++i) {
61 wchar_t c = *(token.begin + token.length + i);
62 if ('\0' == c)
63 return false;
64 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
65 ('A' <= c && c <= 'F'))) {
66 return false;
67 }
68 }
69
70 token.length += digits;
71 return true;
72 }
73
74 } // anonymous namespace
75
76 const char* JSONReader::kBadRootElementType =
77 "Root value must be an array or object.";
78 const char* JSONReader::kInvalidEscape =
79 "Invalid escape sequence.";
80 const char* JSONReader::kSyntaxError =
81 "Syntax error.";
82 const char* JSONReader::kTrailingComma =
83 "Trailing comma not allowed.";
84 const char* JSONReader::kTooMuchNesting =
85 "Too much nesting.";
86 const char* JSONReader::kUnexpectedDataAfterRoot =
87 "Unexpected data after root element.";
88 const char* JSONReader::kUnsupportedEncoding =
89 "Unsupported encoding. JSON must be UTF-8.";
90 const char* JSONReader::kUnquotedDictionaryKey =
91 "Dictionary keys must be quoted.";
92
93 /* static */
94 Value* JSONReader::Read(const std::string& json,
95 bool allow_trailing_comma) {
96 return ReadAndReturnError(json, allow_trailing_comma, NULL);
97 }
98
99 /* static */
100 Value* JSONReader::ReadAndReturnError(const std::string& json,
101 bool allow_trailing_comma,
102 std::string *error_message_out) {
103 JSONReader reader = JSONReader();
104 Value* root = reader.JsonToValue(json, true, allow_trailing_comma);
105 if (root)
106 return root;
107
108 if (error_message_out)
109 *error_message_out = reader.error_message();
110
111 return NULL;
112 }
113
114 /* static */
115 std::string JSONReader::FormatErrorMessage(int line, int column,
116 const char* description) {
117 return StringPrintf("Line: %i, column: %i, %s",
118 line, column, description);
119 }
120
121 JSONReader::JSONReader()
122 : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),
123 allow_trailing_comma_(false) {}
124
125 Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
126 bool allow_trailing_comma) {
127 // The input must be in UTF-8.
128 if (!IsStringUTF8(json.c_str())) {
129 error_message_ = kUnsupportedEncoding;
130 return NULL;
131 }
132
133 // The conversion from UTF8 to wstring removes null bytes for us
134 // (a good thing).
135 std::wstring json_wide(UTF8ToWide(json));
136 start_pos_ = json_wide.c_str();
137
138 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
139 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
140 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
141 // mis-treating a Unicode BOM as an invalid character and returning NULL,
142 // skip a converted Unicode BOM if it exists.
143 if (!json_wide.empty() && start_pos_[0] == 0xFEFF) {
144 ++start_pos_;
145 }
146
147 json_pos_ = start_pos_;
148 allow_trailing_comma_ = allow_trailing_comma;
149 stack_depth_ = 0;
150 error_message_.clear();
151
152 scoped_ptr<Value> root(BuildValue(check_root));
153 if (root.get()) {
154 if (ParseToken().type == Token::END_OF_INPUT) {
155 return root.release();
156 } else {
157 SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_);
158 }
159 }
160
161 // Default to calling errors "syntax errors".
162 if (error_message_.empty())
163 SetErrorMessage(kSyntaxError, json_pos_);
164
165 return NULL;
166 }
167
168 Value* JSONReader::BuildValue(bool is_root) {
169 ++stack_depth_;
170 if (stack_depth_ > kStackLimit) {
171 SetErrorMessage(kTooMuchNesting, json_pos_);
172 return NULL;
173 }
174
175 Token token = ParseToken();
176 // The root token must be an array or an object.
177 if (is_root && token.type != Token::OBJECT_BEGIN &&
178 token.type != Token::ARRAY_BEGIN) {
179 SetErrorMessage(kBadRootElementType, json_pos_);
180 return NULL;
181 }
182
183 scoped_ptr<Value> node;
184
185 switch (token.type) {
186 case Token::END_OF_INPUT:
187 case Token::INVALID_TOKEN:
188 return NULL;
189
190 case Token::NULL_TOKEN:
191 node.reset(Value::CreateNullValue());
192 break;
193
194 case Token::BOOL_TRUE:
195 node.reset(Value::CreateBooleanValue(true));
196 break;
197
198 case Token::BOOL_FALSE:
199 node.reset(Value::CreateBooleanValue(false));
200 break;
201
202 case Token::NUMBER:
203 node.reset(DecodeNumber(token));
204 if (!node.get())
205 return NULL;
206 break;
207
208 case Token::STRING:
209 node.reset(DecodeString(token));
210 if (!node.get())
211 return NULL;
212 break;
213
214 case Token::ARRAY_BEGIN:
215 {
216 json_pos_ += token.length;
217 token = ParseToken();
218
219 node.reset(new ListValue());
220 while (token.type != Token::ARRAY_END) {
221 Value* array_node = BuildValue(false);
222 if (!array_node)
223 return NULL;
224 static_cast<ListValue*>(node.get())->Append(array_node);
225
226 // After a list value, we expect a comma or the end of the list.
227 token = ParseToken();
228 if (token.type == Token::LIST_SEPARATOR) {
229 json_pos_ += token.length;
230 token = ParseToken();
231 // Trailing commas are invalid according to the JSON RFC, but some
232 // consumers need the parsing leniency, so handle accordingly.
233 if (token.type == Token::ARRAY_END) {
234 if (!allow_trailing_comma_) {
235 SetErrorMessage(kTrailingComma, json_pos_);
236 return NULL;
237 }
238 // Trailing comma OK, stop parsing the Array.
239 break;
240 }
241 } else if (token.type != Token::ARRAY_END) {
242 // Unexpected value after list value. Bail out.
243 return NULL;
244 }
245 }
246 if (token.type != Token::ARRAY_END) {
247 return NULL;
248 }
249 break;
250 }
251
252 case Token::OBJECT_BEGIN:
253 {
254 json_pos_ += token.length;
255 token = ParseToken();
256
257 node.reset(new DictionaryValue);
258 while (token.type != Token::OBJECT_END) {
259 if (token.type != Token::STRING) {
260 SetErrorMessage(kUnquotedDictionaryKey, json_pos_);
261 return NULL;
262 }
263 scoped_ptr<Value> dict_key_value(DecodeString(token));
264 if (!dict_key_value.get())
265 return NULL;
266
267 // Convert the key into a wstring.
268 std::wstring dict_key;
269 bool success = dict_key_value->GetAsString(&dict_key);
270 DCHECK(success);
271
272 json_pos_ += token.length;
273 token = ParseToken();
274 if (token.type != Token::OBJECT_PAIR_SEPARATOR)
275 return NULL;
276
277 json_pos_ += token.length;
278 token = ParseToken();
279 Value* dict_value = BuildValue(false);
280 if (!dict_value)
281 return NULL;
282 static_cast<DictionaryValue*>(node.get())->Set(dict_key, dict_value);
283
284 // After a key/value pair, we expect a comma or the end of the
285 // object.
286 token = ParseToken();
287 if (token.type == Token::LIST_SEPARATOR) {
288 json_pos_ += token.length;
289 token = ParseToken();
290 // Trailing commas are invalid according to the JSON RFC, but some
291 // consumers need the parsing leniency, so handle accordingly.
292 if (token.type == Token::OBJECT_END) {
293 if (!allow_trailing_comma_) {
294 SetErrorMessage(kTrailingComma, json_pos_);
295 return NULL;
296 }
297 // Trailing comma OK, stop parsing the Object.
298 break;
299 }
300 } else if (token.type != Token::OBJECT_END) {
301 // Unexpected value after last object value. Bail out.
302 return NULL;
303 }
304 }
305 if (token.type != Token::OBJECT_END)
306 return NULL;
307
308 break;
309 }
310
311 default:
312 // We got a token that's not a value.
313 return NULL;
314 }
315 json_pos_ += token.length;
316
317 --stack_depth_;
318 return node.release();
319 }
320
321 JSONReader::Token JSONReader::ParseNumberToken() {
322 // We just grab the number here. We validate the size in DecodeNumber.
323 // According to RFC4627, a valid number is: [minus] int [frac] [exp]
324 Token token(Token::NUMBER, json_pos_, 0);
325 wchar_t c = *json_pos_;
326 if ('-' == c) {
327 ++token.length;
328 c = token.NextChar();
329 }
330
331 if (!ReadInt(token, false))
332 return kInvalidToken;
333
334 // Optional fraction part
335 c = token.NextChar();
336 if ('.' == c) {
337 ++token.length;
338 if (!ReadInt(token, true))
339 return kInvalidToken;
340 c = token.NextChar();
341 }
342
343 // Optional exponent part
344 if ('e' == c || 'E' == c) {
345 ++token.length;
346 c = token.NextChar();
347 if ('-' == c || '+' == c) {
348 ++token.length;
349 c = token.NextChar();
350 }
351 if (!ReadInt(token, true))
352 return kInvalidToken;
353 }
354
355 return token;
356 }
357
358 Value* JSONReader::DecodeNumber(const Token& token) {
359 const std::wstring num_string(token.begin, token.length);
360
361 int num_int;
362 if (StringToInt(WideToUTF16Hack(num_string), &num_int))
363 return Value::CreateIntegerValue(num_int);
364
365 double num_double;
366 if (StringToDouble(WideToUTF16Hack(num_string), &num_double) &&
367 base::IsFinite(num_double))
368 return Value::CreateRealValue(num_double);
369
370 return NULL;
371 }
372
373 JSONReader::Token JSONReader::ParseStringToken() {
374 Token token(Token::STRING, json_pos_, 1);
375 wchar_t c = token.NextChar();
376 while ('\0' != c) {
377 if ('\\' == c) {
378 ++token.length;
379 c = token.NextChar();
380 // Make sure the escaped char is valid.
381 switch (c) {
382 case 'x':
383 if (!ReadHexDigits(token, 2)) {
384 SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
385 return kInvalidToken;
386 }
387 break;
388 case 'u':
389 if (!ReadHexDigits(token, 4)) {
390 SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
391 return kInvalidToken;
392 }
393 break;
394 case '\\':
395 case '/':
396 case 'b':
397 case 'f':
398 case 'n':
399 case 'r':
400 case 't':
401 case 'v':
402 case '"':
403 break;
404 default:
405 SetErrorMessage(kInvalidEscape, json_pos_ + token.length);
406 return kInvalidToken;
407 }
408 } else if ('"' == c) {
409 ++token.length;
410 return token;
411 }
412 ++token.length;
413 c = token.NextChar();
414 }
415 return kInvalidToken;
416 }
417
418 Value* JSONReader::DecodeString(const Token& token) {
419 std::wstring decoded_str;
420 decoded_str.reserve(token.length - 2);
421
422 for (int i = 1; i < token.length - 1; ++i) {
423 wchar_t c = *(token.begin + i);
424 if ('\\' == c) {
425 ++i;
426 c = *(token.begin + i);
427 switch (c) {
428 case '"':
429 case '/':
430 case '\\':
431 decoded_str.push_back(c);
432 break;
433 case 'b':
434 decoded_str.push_back('\b');
435 break;
436 case 'f':
437 decoded_str.push_back('\f');
438 break;
439 case 'n':
440 decoded_str.push_back('\n');
441 break;
442 case 'r':
443 decoded_str.push_back('\r');
444 break;
445 case 't':
446 decoded_str.push_back('\t');
447 break;
448 case 'v':
449 decoded_str.push_back('\v');
450 break;
451
452 case 'x':
453 decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) +
454 HexToInt(*(token.begin + i + 2)));
455 i += 2;
456 break;
457 case 'u':
458 decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) +
459 (HexToInt(*(token.begin + i + 2)) << 8) +
460 (HexToInt(*(token.begin + i + 3)) << 4) +
461 HexToInt(*(token.begin + i + 4)));
462 i += 4;
463 break;
464
465 default:
466 // We should only have valid strings at this point. If not,
467 // ParseStringToken didn't do it's job.
468 NOTREACHED();
469 return NULL;
470 }
471 } else {
472 // Not escaped
473 decoded_str.push_back(c);
474 }
475 }
476 return Value::CreateStringValue(decoded_str);
477 }
478
479 JSONReader::Token JSONReader::ParseToken() {
480 static const std::wstring kNullString(L"null");
481 static const std::wstring kTrueString(L"true");
482 static const std::wstring kFalseString(L"false");
483
484 EatWhitespaceAndComments();
485
486 Token token(Token::INVALID_TOKEN, 0, 0);
487 switch (*json_pos_) {
488 case '\0':
489 token.type = Token::END_OF_INPUT;
490 break;
491
492 case 'n':
493 if (NextStringMatch(kNullString))
494 token = Token(Token::NULL_TOKEN, json_pos_, 4);
495 break;
496
497 case 't':
498 if (NextStringMatch(kTrueString))
499 token = Token(Token::BOOL_TRUE, json_pos_, 4);
500 break;
501
502 case 'f':
503 if (NextStringMatch(kFalseString))
504 token = Token(Token::BOOL_FALSE, json_pos_, 5);
505 break;
506
507 case '[':
508 token = Token(Token::ARRAY_BEGIN, json_pos_, 1);
509 break;
510
511 case ']':
512 token = Token(Token::ARRAY_END, json_pos_, 1);
513 break;
514
515 case ',':
516 token = Token(Token::LIST_SEPARATOR, json_pos_, 1);
517 break;
518
519 case '{':
520 token = Token(Token::OBJECT_BEGIN, json_pos_, 1);
521 break;
522
523 case '}':
524 token = Token(Token::OBJECT_END, json_pos_, 1);
525 break;
526
527 case ':':
528 token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);
529 break;
530
531 case '0':
532 case '1':
533 case '2':
534 case '3':
535 case '4':
536 case '5':
537 case '6':
538 case '7':
539 case '8':
540 case '9':
541 case '-':
542 token = ParseNumberToken();
543 break;
544
545 case '"':
546 token = ParseStringToken();
547 break;
548 }
549 return token;
550 }
551
552 bool JSONReader::NextStringMatch(const std::wstring& str) {
553 for (size_t i = 0; i < str.length(); ++i) {
554 if ('\0' == *json_pos_)
555 return false;
556 if (*(json_pos_ + i) != str[i])
557 return false;
558 }
559 return true;
560 }
561
562 void JSONReader::EatWhitespaceAndComments() {
563 while ('\0' != *json_pos_) {
564 switch (*json_pos_) {
565 case ' ':
566 case '\n':
567 case '\r':
568 case '\t':
569 ++json_pos_;
570 break;
571 case '/':
572 // TODO(tc): This isn't in the RFC so it should be a parser flag.
573 if (!EatComment())
574 return;
575 break;
576 default:
577 // Not a whitespace char, just exit.
578 return;
579 }
580 }
581 }
582
583 bool JSONReader::EatComment() {
584 if ('/' != *json_pos_)
585 return false;
586
587 wchar_t next_char = *(json_pos_ + 1);
588 if ('/' == next_char) {
589 // Line comment, read until \n or \r
590 json_pos_ += 2;
591 while ('\0' != *json_pos_) {
592 switch (*json_pos_) {
593 case '\n':
594 case '\r':
595 ++json_pos_;
596 return true;
597 default:
598 ++json_pos_;
599 }
600 }
601 } else if ('*' == next_char) {
602 // Block comment, read until */
603 json_pos_ += 2;
604 while ('\0' != *json_pos_) {
605 if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
606 json_pos_ += 2;
607 return true;
608 }
609 ++json_pos_;
610 }
611 } else {
612 return false;
613 }
614 return true;
615 }
616
617 void JSONReader::SetErrorMessage(const char* description,
618 const wchar_t* error_pos) {
619 int line_number = 1;
620 int column_number = 1;
621
622 // Figure out the line and column the error occured at.
623 for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) {
624 if (*pos == '\0') {
625 NOTREACHED();
626 return;
627 }
628
629 if (*pos == '\n') {
630 ++line_number;
631 column_number = 1;
632 } else {
633 ++column_number;
634 }
635 }
636
637 error_message_ = FormatErrorMessage(line_number, column_number, description);
638 }
OLDNEW
« no previous file with comments | « base/json_reader.h ('k') | base/json_reader_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698