Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(259)

Side by Side Diff: base/json/json_parser.cc

Issue 1647803004: Move base to DEPS (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include <cmath>
8
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string_number_conversions.h"
12 #include "base/strings/string_piece.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversion_utils.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/third_party/icu/icu_utf.h"
18 #include "base/values.h"
19
20 namespace base {
21 namespace internal {
22
23 namespace {
24
25 const int kStackMaxDepth = 100;
26
27 const int32 kExtendedASCIIStart = 0x80;
28
29 // This and the class below are used to own the JSON input string for when
30 // string tokens are stored as StringPiece instead of std::string. This
31 // optimization avoids about 2/3rds of string memory copies. The constructor
32 // takes ownership of the input string. The real root value is Swap()ed into
33 // the new instance.
34 class DictionaryHiddenRootValue : public DictionaryValue {
35 public:
36 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
37 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
38 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
39 }
40
41 void Swap(DictionaryValue* other) override {
42 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
43
44 // First deep copy to convert JSONStringValue to std::string and swap that
45 // copy with |other|, which contains the new contents of |this|.
46 scoped_ptr<DictionaryValue> copy(DeepCopy());
47 copy->Swap(other);
48
49 // Then erase the contents of the current dictionary and swap in the
50 // new contents, originally from |other|.
51 Clear();
52 json_.reset();
53 DictionaryValue::Swap(copy.get());
54 }
55
56 // Not overriding DictionaryValue::Remove because it just calls through to
57 // the method below.
58
59 bool RemoveWithoutPathExpansion(const std::string& key,
60 scoped_ptr<Value>* out) override {
61 // If the caller won't take ownership of the removed value, just call up.
62 if (!out)
63 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
64
65 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
66
67 // Otherwise, remove the value while its still "owned" by this and copy it
68 // to convert any JSONStringValues to std::string.
69 scoped_ptr<Value> out_owned;
70 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
71 return false;
72
73 out->reset(out_owned->DeepCopy());
74
75 return true;
76 }
77
78 private:
79 scoped_ptr<std::string> json_;
80
81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
82 };
83
84 class ListHiddenRootValue : public ListValue {
85 public:
86 ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
87 DCHECK(root->IsType(Value::TYPE_LIST));
88 ListValue::Swap(static_cast<ListValue*>(root));
89 }
90
91 void Swap(ListValue* other) override {
92 DVLOG(1) << "Swap()ing a ListValue inefficiently.";
93
94 // First deep copy to convert JSONStringValue to std::string and swap that
95 // copy with |other|, which contains the new contents of |this|.
96 scoped_ptr<ListValue> copy(DeepCopy());
97 copy->Swap(other);
98
99 // Then erase the contents of the current list and swap in the new contents,
100 // originally from |other|.
101 Clear();
102 json_.reset();
103 ListValue::Swap(copy.get());
104 }
105
106 bool Remove(size_t index, scoped_ptr<Value>* out) override {
107 // If the caller won't take ownership of the removed value, just call up.
108 if (!out)
109 return ListValue::Remove(index, out);
110
111 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
112
113 // Otherwise, remove the value while its still "owned" by this and copy it
114 // to convert any JSONStringValues to std::string.
115 scoped_ptr<Value> out_owned;
116 if (!ListValue::Remove(index, &out_owned))
117 return false;
118
119 out->reset(out_owned->DeepCopy());
120
121 return true;
122 }
123
124 private:
125 scoped_ptr<std::string> json_;
126
127 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
128 };
129
130 // A variant on StringValue that uses StringPiece instead of copying the string
131 // into the Value. This can only be stored in a child of hidden root (above),
132 // otherwise the referenced string will not be guaranteed to outlive it.
133 class JSONStringValue : public Value {
134 public:
135 explicit JSONStringValue(const StringPiece& piece)
136 : Value(TYPE_STRING), string_piece_(piece) {}
137
138 // Overridden from Value:
139 bool GetAsString(std::string* out_value) const override {
140 string_piece_.CopyToString(out_value);
141 return true;
142 }
143 bool GetAsString(string16* out_value) const override {
144 *out_value = UTF8ToUTF16(string_piece_);
145 return true;
146 }
147 Value* DeepCopy() const override {
148 return new StringValue(string_piece_.as_string());
149 }
150 bool Equals(const Value* other) const override {
151 std::string other_string;
152 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
153 StringPiece(other_string) == string_piece_;
154 }
155
156 private:
157 // The location in the original input stream.
158 StringPiece string_piece_;
159
160 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
161 };
162
163 // Simple class that checks for maximum recursion/"stack overflow."
164 class StackMarker {
165 public:
166 explicit StackMarker(int* depth) : depth_(depth) {
167 ++(*depth_);
168 DCHECK_LE(*depth_, kStackMaxDepth);
169 }
170 ~StackMarker() {
171 --(*depth_);
172 }
173
174 bool IsTooDeep() const {
175 return *depth_ >= kStackMaxDepth;
176 }
177
178 private:
179 int* const depth_;
180
181 DISALLOW_COPY_AND_ASSIGN(StackMarker);
182 };
183
184 } // namespace
185
186 JSONParser::JSONParser(int options)
187 : options_(options),
188 start_pos_(NULL),
189 pos_(NULL),
190 end_pos_(NULL),
191 index_(0),
192 stack_depth_(0),
193 line_number_(0),
194 index_last_line_(0),
195 error_code_(JSONReader::JSON_NO_ERROR),
196 error_line_(0),
197 error_column_(0) {
198 }
199
200 JSONParser::~JSONParser() {
201 }
202
203 Value* JSONParser::Parse(const StringPiece& input) {
204 scoped_ptr<std::string> input_copy;
205 // If the children of a JSON root can be detached, then hidden roots cannot
206 // be used, so do not bother copying the input because StringPiece will not
207 // be used anywhere.
208 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
209 input_copy.reset(new std::string(input.as_string()));
210 start_pos_ = input_copy->data();
211 } else {
212 start_pos_ = input.data();
213 }
214 pos_ = start_pos_;
215 end_pos_ = start_pos_ + input.length();
216 index_ = 0;
217 line_number_ = 1;
218 index_last_line_ = 0;
219
220 error_code_ = JSONReader::JSON_NO_ERROR;
221 error_line_ = 0;
222 error_column_ = 0;
223
224 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
225 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
226 // ParseNextToken function mis-treating a Unicode BOM as an invalid
227 // character and returning NULL.
228 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
229 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
230 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
231 NextNChars(3);
232 }
233
234 // Parse the first and any nested tokens.
235 scoped_ptr<Value> root(ParseNextToken());
236 if (!root.get())
237 return NULL;
238
239 // Make sure the input stream is at an end.
240 if (GetNextToken() != T_END_OF_INPUT) {
241 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
242 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
243 return NULL;
244 }
245 }
246
247 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
248 // hidden root.
249 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
250 if (root->IsType(Value::TYPE_DICTIONARY)) {
251 return new DictionaryHiddenRootValue(input_copy.release(), root.get());
252 } else if (root->IsType(Value::TYPE_LIST)) {
253 return new ListHiddenRootValue(input_copy.release(), root.get());
254 } else if (root->IsType(Value::TYPE_STRING)) {
255 // A string type could be a JSONStringValue, but because there's no
256 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
257 // preserve it.
258 return root->DeepCopy();
259 }
260 }
261
262 // All other values can be returned directly.
263 return root.release();
264 }
265
266 JSONReader::JsonParseError JSONParser::error_code() const {
267 return error_code_;
268 }
269
270 std::string JSONParser::GetErrorMessage() const {
271 return FormatErrorMessage(error_line_, error_column_,
272 JSONReader::ErrorCodeToString(error_code_));
273 }
274
275 // StringBuilder ///////////////////////////////////////////////////////////////
276
277 JSONParser::StringBuilder::StringBuilder()
278 : pos_(NULL),
279 length_(0),
280 string_(NULL) {
281 }
282
283 JSONParser::StringBuilder::StringBuilder(const char* pos)
284 : pos_(pos),
285 length_(0),
286 string_(NULL) {
287 }
288
289 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
290 std::swap(other->string_, string_);
291 std::swap(other->pos_, pos_);
292 std::swap(other->length_, length_);
293 }
294
295 JSONParser::StringBuilder::~StringBuilder() {
296 delete string_;
297 }
298
299 void JSONParser::StringBuilder::Append(const char& c) {
300 DCHECK_GE(c, 0);
301 DCHECK_LT(c, 128);
302
303 if (string_)
304 string_->push_back(c);
305 else
306 ++length_;
307 }
308
309 void JSONParser::StringBuilder::AppendString(const std::string& str) {
310 DCHECK(string_);
311 string_->append(str);
312 }
313
314 void JSONParser::StringBuilder::Convert() {
315 if (string_)
316 return;
317 string_ = new std::string(pos_, length_);
318 }
319
320 bool JSONParser::StringBuilder::CanBeStringPiece() const {
321 return !string_;
322 }
323
324 StringPiece JSONParser::StringBuilder::AsStringPiece() {
325 if (string_)
326 return StringPiece();
327 return StringPiece(pos_, length_);
328 }
329
330 const std::string& JSONParser::StringBuilder::AsString() {
331 if (!string_)
332 Convert();
333 return *string_;
334 }
335
336 // JSONParser private //////////////////////////////////////////////////////////
337
338 inline bool JSONParser::CanConsume(int length) {
339 return pos_ + length <= end_pos_;
340 }
341
342 const char* JSONParser::NextChar() {
343 DCHECK(CanConsume(1));
344 ++index_;
345 ++pos_;
346 return pos_;
347 }
348
349 void JSONParser::NextNChars(int n) {
350 DCHECK(CanConsume(n));
351 index_ += n;
352 pos_ += n;
353 }
354
355 JSONParser::Token JSONParser::GetNextToken() {
356 EatWhitespaceAndComments();
357 if (!CanConsume(1))
358 return T_END_OF_INPUT;
359
360 switch (*pos_) {
361 case '{':
362 return T_OBJECT_BEGIN;
363 case '}':
364 return T_OBJECT_END;
365 case '[':
366 return T_ARRAY_BEGIN;
367 case ']':
368 return T_ARRAY_END;
369 case '"':
370 return T_STRING;
371 case '0':
372 case '1':
373 case '2':
374 case '3':
375 case '4':
376 case '5':
377 case '6':
378 case '7':
379 case '8':
380 case '9':
381 case '-':
382 return T_NUMBER;
383 case 't':
384 return T_BOOL_TRUE;
385 case 'f':
386 return T_BOOL_FALSE;
387 case 'n':
388 return T_NULL;
389 case ',':
390 return T_LIST_SEPARATOR;
391 case ':':
392 return T_OBJECT_PAIR_SEPARATOR;
393 default:
394 return T_INVALID_TOKEN;
395 }
396 }
397
398 void JSONParser::EatWhitespaceAndComments() {
399 while (pos_ < end_pos_) {
400 switch (*pos_) {
401 case '\r':
402 case '\n':
403 index_last_line_ = index_;
404 // Don't increment line_number_ twice for "\r\n".
405 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
406 ++line_number_;
407 // Fall through.
408 case ' ':
409 case '\t':
410 NextChar();
411 break;
412 case '/':
413 if (!EatComment())
414 return;
415 break;
416 default:
417 return;
418 }
419 }
420 }
421
422 bool JSONParser::EatComment() {
423 if (*pos_ != '/' || !CanConsume(1))
424 return false;
425
426 char next_char = *NextChar();
427 if (next_char == '/') {
428 // Single line comment, read to newline.
429 while (CanConsume(1)) {
430 next_char = *NextChar();
431 if (next_char == '\n' || next_char == '\r')
432 return true;
433 }
434 } else if (next_char == '*') {
435 char previous_char = '\0';
436 // Block comment, read until end marker.
437 while (CanConsume(1)) {
438 next_char = *NextChar();
439 if (previous_char == '*' && next_char == '/') {
440 // EatWhitespaceAndComments will inspect pos_, which will still be on
441 // the last / of the comment, so advance once more (which may also be
442 // end of input).
443 NextChar();
444 return true;
445 }
446 previous_char = next_char;
447 }
448
449 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
450 }
451
452 return false;
453 }
454
455 Value* JSONParser::ParseNextToken() {
456 return ParseToken(GetNextToken());
457 }
458
459 Value* JSONParser::ParseToken(Token token) {
460 switch (token) {
461 case T_OBJECT_BEGIN:
462 return ConsumeDictionary();
463 case T_ARRAY_BEGIN:
464 return ConsumeList();
465 case T_STRING:
466 return ConsumeString();
467 case T_NUMBER:
468 return ConsumeNumber();
469 case T_BOOL_TRUE:
470 case T_BOOL_FALSE:
471 case T_NULL:
472 return ConsumeLiteral();
473 default:
474 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
475 return NULL;
476 }
477 }
478
479 Value* JSONParser::ConsumeDictionary() {
480 if (*pos_ != '{') {
481 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
482 return NULL;
483 }
484
485 StackMarker depth_check(&stack_depth_);
486 if (depth_check.IsTooDeep()) {
487 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
488 return NULL;
489 }
490
491 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
492
493 NextChar();
494 Token token = GetNextToken();
495 while (token != T_OBJECT_END) {
496 if (token != T_STRING) {
497 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
498 return NULL;
499 }
500
501 // First consume the key.
502 StringBuilder key;
503 if (!ConsumeStringRaw(&key)) {
504 return NULL;
505 }
506
507 // Read the separator.
508 NextChar();
509 token = GetNextToken();
510 if (token != T_OBJECT_PAIR_SEPARATOR) {
511 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
512 return NULL;
513 }
514
515 // The next token is the value. Ownership transfers to |dict|.
516 NextChar();
517 Value* value = ParseNextToken();
518 if (!value) {
519 // ReportError from deeper level.
520 return NULL;
521 }
522
523 dict->SetWithoutPathExpansion(key.AsString(), value);
524
525 NextChar();
526 token = GetNextToken();
527 if (token == T_LIST_SEPARATOR) {
528 NextChar();
529 token = GetNextToken();
530 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
531 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
532 return NULL;
533 }
534 } else if (token != T_OBJECT_END) {
535 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
536 return NULL;
537 }
538 }
539
540 return dict.release();
541 }
542
543 Value* JSONParser::ConsumeList() {
544 if (*pos_ != '[') {
545 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
546 return NULL;
547 }
548
549 StackMarker depth_check(&stack_depth_);
550 if (depth_check.IsTooDeep()) {
551 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
552 return NULL;
553 }
554
555 scoped_ptr<ListValue> list(new ListValue);
556
557 NextChar();
558 Token token = GetNextToken();
559 while (token != T_ARRAY_END) {
560 Value* item = ParseToken(token);
561 if (!item) {
562 // ReportError from deeper level.
563 return NULL;
564 }
565
566 list->Append(item);
567
568 NextChar();
569 token = GetNextToken();
570 if (token == T_LIST_SEPARATOR) {
571 NextChar();
572 token = GetNextToken();
573 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
574 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
575 return NULL;
576 }
577 } else if (token != T_ARRAY_END) {
578 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
579 return NULL;
580 }
581 }
582
583 return list.release();
584 }
585
586 Value* JSONParser::ConsumeString() {
587 StringBuilder string;
588 if (!ConsumeStringRaw(&string))
589 return NULL;
590
591 // Create the Value representation, using a hidden root, if configured
592 // to do so, and if the string can be represented by StringPiece.
593 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
594 return new JSONStringValue(string.AsStringPiece());
595 } else {
596 if (string.CanBeStringPiece())
597 string.Convert();
598 return new StringValue(string.AsString());
599 }
600 }
601
602 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
603 if (*pos_ != '"') {
604 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
605 return false;
606 }
607
608 // StringBuilder will internally build a StringPiece unless a UTF-16
609 // conversion occurs, at which point it will perform a copy into a
610 // std::string.
611 StringBuilder string(NextChar());
612
613 int length = end_pos_ - start_pos_;
614 int32 next_char = 0;
615
616 while (CanConsume(1)) {
617 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
618 CBU8_NEXT(start_pos_, index_, length, next_char);
619 if (next_char < 0 || !IsValidCharacter(next_char)) {
620 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
621 return false;
622 }
623
624 // If this character is an escape sequence...
625 if (next_char == '\\') {
626 // The input string will be adjusted (either by combining the two
627 // characters of an encoded escape sequence, or with a UTF conversion),
628 // so using StringPiece isn't possible -- force a conversion.
629 string.Convert();
630
631 if (!CanConsume(1)) {
632 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
633 return false;
634 }
635
636 switch (*NextChar()) {
637 // Allowed esape sequences:
638 case 'x': { // UTF-8 sequence.
639 // UTF-8 \x escape sequences are not allowed in the spec, but they
640 // are supported here for backwards-compatiblity with the old parser.
641 if (!CanConsume(2)) {
642 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
643 return false;
644 }
645
646 int hex_digit = 0;
647 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
648 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
649 return false;
650 }
651 NextChar();
652
653 if (hex_digit < kExtendedASCIIStart)
654 string.Append(static_cast<char>(hex_digit));
655 else
656 DecodeUTF8(hex_digit, &string);
657 break;
658 }
659 case 'u': { // UTF-16 sequence.
660 // UTF units are of the form \uXXXX.
661 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
662 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
663 return false;
664 }
665
666 // Skip the 'u'.
667 NextChar();
668
669 std::string utf8_units;
670 if (!DecodeUTF16(&utf8_units)) {
671 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
672 return false;
673 }
674
675 string.AppendString(utf8_units);
676 break;
677 }
678 case '"':
679 string.Append('"');
680 break;
681 case '\\':
682 string.Append('\\');
683 break;
684 case '/':
685 string.Append('/');
686 break;
687 case 'b':
688 string.Append('\b');
689 break;
690 case 'f':
691 string.Append('\f');
692 break;
693 case 'n':
694 string.Append('\n');
695 break;
696 case 'r':
697 string.Append('\r');
698 break;
699 case 't':
700 string.Append('\t');
701 break;
702 case 'v': // Not listed as valid escape sequence in the RFC.
703 string.Append('\v');
704 break;
705 // All other escape squences are illegal.
706 default:
707 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
708 return false;
709 }
710 } else if (next_char == '"') {
711 --index_; // Rewind by one because of CBU8_NEXT.
712 out->Swap(&string);
713 return true;
714 } else {
715 if (next_char < kExtendedASCIIStart)
716 string.Append(static_cast<char>(next_char));
717 else
718 DecodeUTF8(next_char, &string);
719 }
720 }
721
722 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
723 return false;
724 }
725
726 // Entry is at the first X in \uXXXX.
727 bool JSONParser::DecodeUTF16(std::string* dest_string) {
728 if (!CanConsume(4))
729 return false;
730
731 // This is a 32-bit field because the shift operations in the
732 // conversion process below cause MSVC to error about "data loss."
733 // This only stores UTF-16 code units, though.
734 // Consume the UTF-16 code unit, which may be a high surrogate.
735 int code_unit16_high = 0;
736 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
737 return false;
738
739 // Only add 3, not 4, because at the end of this iteration, the parser has
740 // finished working with the last digit of the UTF sequence, meaning that
741 // the next iteration will advance to the next byte.
742 NextNChars(3);
743
744 // Used to convert the UTF-16 code units to a code point and then to a UTF-8
745 // code unit sequence.
746 char code_unit8[8] = { 0 };
747 size_t offset = 0;
748
749 // If this is a high surrogate, consume the next code unit to get the
750 // low surrogate.
751 if (CBU16_IS_SURROGATE(code_unit16_high)) {
752 // Make sure this is the high surrogate. If not, it's an encoding
753 // error.
754 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
755 return false;
756
757 // Make sure that the token has more characters to consume the
758 // lower surrogate.
759 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
760 return false;
761 if (*NextChar() != '\\' || *NextChar() != 'u')
762 return false;
763
764 NextChar(); // Read past 'u'.
765 int code_unit16_low = 0;
766 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
767 return false;
768
769 NextNChars(3);
770
771 if (!CBU16_IS_TRAIL(code_unit16_low)) {
772 return false;
773 }
774
775 uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
776 code_unit16_low);
777 if (!IsValidCharacter(code_point))
778 return false;
779
780 offset = 0;
781 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
782 } else {
783 // Not a surrogate.
784 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
785 if (!IsValidCharacter(code_unit16_high))
786 return false;
787
788 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
789 }
790
791 dest_string->append(code_unit8);
792 return true;
793 }
794
795 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
796 DCHECK(IsValidCharacter(point));
797
798 // Anything outside of the basic ASCII plane will need to be decoded from
799 // int32 to a multi-byte sequence.
800 if (point < kExtendedASCIIStart) {
801 dest->Append(static_cast<char>(point));
802 } else {
803 char utf8_units[4] = { 0 };
804 int offset = 0;
805 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
806 dest->Convert();
807 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
808 // zero terminated at this point. |offset| contains the correct length.
809 dest->AppendString(std::string(utf8_units, offset));
810 }
811 }
812
813 Value* JSONParser::ConsumeNumber() {
814 const char* num_start = pos_;
815 const int start_index = index_;
816 int end_index = start_index;
817
818 if (*pos_ == '-')
819 NextChar();
820
821 if (!ReadInt(false)) {
822 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
823 return NULL;
824 }
825 end_index = index_;
826
827 // The optional fraction part.
828 if (*pos_ == '.') {
829 if (!CanConsume(1)) {
830 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
831 return NULL;
832 }
833 NextChar();
834 if (!ReadInt(true)) {
835 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
836 return NULL;
837 }
838 end_index = index_;
839 }
840
841 // Optional exponent part.
842 if (*pos_ == 'e' || *pos_ == 'E') {
843 NextChar();
844 if (*pos_ == '-' || *pos_ == '+')
845 NextChar();
846 if (!ReadInt(true)) {
847 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
848 return NULL;
849 }
850 end_index = index_;
851 }
852
853 // ReadInt is greedy because numbers have no easily detectable sentinel,
854 // so save off where the parser should be on exit (see Consume invariant at
855 // the top of the header), then make sure the next token is one which is
856 // valid.
857 const char* exit_pos = pos_ - 1;
858 int exit_index = index_ - 1;
859
860 switch (GetNextToken()) {
861 case T_OBJECT_END:
862 case T_ARRAY_END:
863 case T_LIST_SEPARATOR:
864 case T_END_OF_INPUT:
865 break;
866 default:
867 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
868 return NULL;
869 }
870
871 pos_ = exit_pos;
872 index_ = exit_index;
873
874 StringPiece num_string(num_start, end_index - start_index);
875
876 int num_int;
877 if (StringToInt(num_string, &num_int))
878 return new FundamentalValue(num_int);
879
880 double num_double;
881 if (StringToDouble(num_string.as_string(), &num_double) &&
882 std::isfinite(num_double)) {
883 return new FundamentalValue(num_double);
884 }
885
886 return NULL;
887 }
888
889 bool JSONParser::ReadInt(bool allow_leading_zeros) {
890 char first = *pos_;
891 int len = 0;
892
893 char c = first;
894 while (CanConsume(1) && IsAsciiDigit(c)) {
895 c = *NextChar();
896 ++len;
897 }
898
899 if (len == 0)
900 return false;
901
902 if (!allow_leading_zeros && len > 1 && first == '0')
903 return false;
904
905 return true;
906 }
907
908 Value* JSONParser::ConsumeLiteral() {
909 switch (*pos_) {
910 case 't': {
911 const char kTrueLiteral[] = "true";
912 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
913 if (!CanConsume(kTrueLen - 1) ||
914 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
915 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
916 return NULL;
917 }
918 NextNChars(kTrueLen - 1);
919 return new FundamentalValue(true);
920 }
921 case 'f': {
922 const char kFalseLiteral[] = "false";
923 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
924 if (!CanConsume(kFalseLen - 1) ||
925 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
926 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
927 return NULL;
928 }
929 NextNChars(kFalseLen - 1);
930 return new FundamentalValue(false);
931 }
932 case 'n': {
933 const char kNullLiteral[] = "null";
934 const int kNullLen = static_cast<int>(strlen(kNullLiteral));
935 if (!CanConsume(kNullLen - 1) ||
936 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
937 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
938 return NULL;
939 }
940 NextNChars(kNullLen - 1);
941 return Value::CreateNullValue().release();
942 }
943 default:
944 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
945 return NULL;
946 }
947 }
948
949 // static
950 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
951 return strncmp(one, two, len) == 0;
952 }
953
954 void JSONParser::ReportError(JSONReader::JsonParseError code,
955 int column_adjust) {
956 error_code_ = code;
957 error_line_ = line_number_;
958 error_column_ = index_ - index_last_line_ + column_adjust;
959 }
960
961 // static
962 std::string JSONParser::FormatErrorMessage(int line, int column,
963 const std::string& description) {
964 if (line || column) {
965 return StringPrintf("Line: %i, column: %i, %s",
966 line, column, description.c_str());
967 }
968 return description;
969 }
970
971 } // namespace internal
972 } // namespace base
OLDNEW
« no previous file with comments | « base/json/json_parser.h ('k') | base/json/json_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698