Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: base/json/json_parser.cc

Issue 10035042: Rewrite base::JSONReader to be 35-40% faster, depending on the input string. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address comments/fix Win Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include "base/float_util.h"
8 #include "base/logging.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_number_conversions.h"
11 #include "base/string_util.h"
12 #include "base/stringprintf.h"
13 #include "base/third_party/icu/icu_utf.h"
14 #include "base/utf_string_conversion_utils.h"
15 #include "base/utf_string_conversions.h"
16 #include "base/values.h"
17
18 namespace base {
19 namespace internal {
20
21 namespace {
22
23 const int kStackMaxDepth = 100;
24
25 const int32 kExtendedASCIIStart = 0x80;
26
27 // This and the class below are used to own the JSON input string for when
28 // string tokens are stored as StringPiece instead of std::string. This
29 // optimization avoids about 2/3rds of string memory copies. The constructor
30 // takes the input string and swaps its data into the new instance. The real
31 // root value is also Swap()ed into the new instance.
32 class DictionaryHiddenRootValue : public base::DictionaryValue {
33 public:
34 DictionaryHiddenRootValue(std::string* json, Value* root) {
35 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
36 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
37 json->swap(json_);
38 }
39
40 virtual void Swap(DictionaryValue* other) OVERRIDE {
41 DLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
Mark Mentovai 2012/05/08 20:19:41 DLOG(1) is DLOG(WARNING). You either meant that or
Robert Sesek 2012/05/15 16:57:51 I've said it before, but it bears repeating: we ha
42
43 // First deep copy to convert JSONStringValue to std::string and swap that
44 // copy with |other|, which contains the new contents of |this|.
45 scoped_ptr<base::DictionaryValue> copy(DeepCopy());
46 copy->Swap(other);
47
48 // Then erase the contents of the current dictionary and swap in the
49 // new contents, originally from |other|.
50 Clear();
51 json_.clear();
52 DictionaryValue::Swap(copy.get());
53 }
54
55 // Not overriding DictionaryValue::Remove because it just calls through to
56 // the method below.
57
58 virtual bool RemoveWithoutPathExpansion(const std::string& key,
59 Value** out) OVERRIDE {
60 // If the caller won't take ownership of the removed value, just call up.
61 if (!out)
62 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
63
64 DLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
65
66 // Otherwise, remove the value while its still "owned" by this and copy it
67 // to convert any JSONStringValues to std::string.
68 Value* out_owned = NULL;
69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
70 return false;
71
72 *out = out_owned->DeepCopy();
73 delete out_owned;
74
75 return true;
76 }
77
78 private:
79 std::string json_;
80
81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
82 };
83
84 class ListHiddenRootValue : public base::ListValue {
85 public:
86 ListHiddenRootValue(std::string* json, Value* root) {
87 DCHECK(root->IsType(Value::TYPE_LIST));
88 ListValue::Swap(static_cast<ListValue*>(root));
89 json->swap(json_);
90 }
91
92 virtual void Swap(ListValue* other) OVERRIDE {
93 DLOG(1) << "Swap()ing a ListValue inefficiently.";
94
95 // First deep copy to convert JSONStringValue to std::string and swap that
96 // copy with |other|, which contains the new contents of |this|.
97 scoped_ptr<base::ListValue> copy(DeepCopy());
98 copy->Swap(other);
99
100 // Then erase the contents of the current list and swap in the new contents,
101 // originally from |other|.
102 Clear();
103 json_.clear();
104 ListValue::Swap(copy.get());
105 }
106
107 virtual bool Remove(size_t index, Value** out) OVERRIDE {
108 // If the caller won't take ownership of the removed value, just call up.
109 if (!out)
110 return ListValue::Remove(index, out);
111
112 DLOG(1) << "Remove()ing from a ListValue inefficiently.";
113
114 // Otherwise, remove the value while its still "owned" by this and copy it
115 // to convert any JSONStringValues to std::string.
116 Value* out_owned = NULL;
117 if (!ListValue::Remove(index, &out_owned))
118 return false;
119
120 *out = out_owned->DeepCopy();
121 delete out_owned;
122
123 return true;
124 }
125
126 private:
127 std::string json_;
128
129 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
130 };
131
132 // A variant on StringValue that uses StringPiece instead of copying the string
133 // into the Value. This can only be stored in a child of hidden root (above),
134 // otherwise the referenced string will not be guaranteed to outlive it.
135 class JSONStringValue : public base::Value {
136 public:
137 explicit JSONStringValue(const base::StringPiece& piece)
138 : Value(TYPE_STRING),
139 string_piece_(piece) {
140 }
141
142 // Value:
143 bool GetAsString(std::string* out_value) const OVERRIDE {
144 string_piece_.CopyToString(out_value);
145 return true;
146 }
147 bool GetAsString(string16* out_value) const OVERRIDE {
148 *out_value = UTF8ToUTF16(string_piece_);
149 return true;
150 }
151 virtual Value* DeepCopy() const OVERRIDE {
152 return Value::CreateStringValue(string_piece_.as_string());
153 }
154 virtual bool Equals(const Value* other) const OVERRIDE {
155 std::string other_string;
156 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
157 StringPiece(other_string) == string_piece_;
158 }
159
160 private:
161 // The location in the original input stream.
162 base::StringPiece string_piece_;
163
164 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
165 };
166
167 // Simple class that checks for maximum recursion/"stack overflow."
168 class StackMarker {
169 public:
170 explicit StackMarker(int* depth) : depth_(depth) {
171 ++(*depth_);
172 }
Mark Mentovai 2012/05/08 20:19:41 You should (D)CHECK here that depth <= kStackMaxDe
Robert Sesek 2012/05/15 16:57:51 Done.
173 ~StackMarker() {
174 --(*depth_);
175 }
176
177 bool IsTooDeep() const {
178 return *depth_ >= kStackMaxDepth;
179 }
180
181 private:
182 int* const depth_;
183
184 DISALLOW_COPY_AND_ASSIGN(StackMarker);
185 };
186
187 } // namespace
188
189 JSONParser::JSONParser(int options)
190 : options_(options),
191 start_pos_(NULL),
192 pos_(0),
tfarina 2012/05/04 00:25:28 nit: just curious why did you choose 0 to initiali
Robert Sesek 2012/05/15 16:57:51 Done.
193 end_pos_(0),
194 index_(0),
195 stack_depth_(0),
196 line_number_(0),
197 index_last_line_(0),
198 error_code_(JSONReader::JSON_NO_ERROR),
199 error_line_(0),
200 error_column_(0) {
201 }
202
203 JSONParser::~JSONParser() {
204 }
205
206 Value* JSONParser::Parse(const std::string& input) {
207 // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix
208 // <http://crbug.com/126107> when my Windows box arrives.
209 #if defined(OS_WIN)
210 options_ |= JSON_DETACHABLE_CHILDREN;
211 #endif
212
213 std::string input_copy;
214 // If the children of a JSON root can be detached, then hidden roots cannot
215 // be used, so do not bother copying the input because StringPiece will not
216 // be used anywhere.
217 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
218 input_copy = input;
219 start_pos_ = input_copy.data();
220 } else {
221 start_pos_ = input.data();
222 }
223 pos_ = start_pos_;
224 end_pos_ = start_pos_ + input.length();
225 index_ = 0;
226 line_number_ = 1;
227 index_last_line_ = 0;
228
229 error_code_ = JSONReader::JSON_NO_ERROR;
230 error_line_ = 0;
231 error_column_ = 0;
232
233 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
234 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
235 // ParseNextToken function mis-treating a Unicode BOM as an invalid
236 // character and returning NULL.
237 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
238 static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
239 static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
240 NextNChars(3);
241 }
242
243 // Parse the first and all subsequent tokens.
244 scoped_ptr<Value> root(ParseNextToken());
245 if (!root.get())
246 return NULL;
247
248 // Make sure the input stream is at an end.
249 if (GetNextToken() != T_END_OF_INPUT) {
250 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
251 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
252 return NULL;
253 }
254 }
255
256 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
257 // hidden root.
258 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
259 if (root->IsType(Value::TYPE_DICTIONARY)) {
260 return new DictionaryHiddenRootValue(&input_copy, root.release());
261 } else if (root->IsType(Value::TYPE_LIST)) {
262 return new ListHiddenRootValue(&input_copy, root.release());
263 } else if (root->IsType(Value::TYPE_STRING)) {
264 // A string type could be a JSONStringValue, but because there's no
265 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
266 // preserve it.
267 return root->DeepCopy();
268 }
269 }
270
271 // All other values can be returned directly.
272 return root.release();
273 }
274
275 JSONReader::JsonParseError JSONParser::error_code() const {
276 return error_code_;
277 }
278
279 std::string JSONParser::GetErrorMessage() const {
280 return FormatErrorMessage(error_line_, error_column_,
281 JSONReader::ErrorCodeToString(error_code_));
282 }
283
284 // StringBuilder ///////////////////////////////////////////////////////////////
285
286 JSONParser::StringBuilder::StringBuilder()
287 : pos_(NULL),
288 length_(0),
289 string_(NULL) {
290 }
291
292 JSONParser::StringBuilder::StringBuilder(const char* pos)
293 : pos_(pos),
294 length_(0),
295 string_(NULL) {
296 }
297
298 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
299 std::swap(other->string_, string_);
300 std::swap(other->pos_, pos_);
301 std::swap(other->length_, length_);
302 }
303
304 JSONParser::StringBuilder::~StringBuilder() {
305 delete string_;
306 }
307
308 void JSONParser::StringBuilder::Append(const char& c) {
309 DCHECK_GE(c, 0);
Mark Mentovai 2012/05/08 20:19:41 Because of the stupid nature of char, you should b
Robert Sesek 2012/05/15 16:57:51 How would you do this?
310 DCHECK_LT(c, 128);
311
312 if (string_)
313 string_->push_back(c);
314 else
315 ++length_;
316 }
317
318 void JSONParser::StringBuilder::AppendString(const std::string& str) {
319 DCHECK(string_);
320 string_->append(str);
321 }
322
323 void JSONParser::StringBuilder::Convert() {
324 if (string_)
325 return;
326 string_ = new std::string(pos_, length_);
327 }
328
329 bool JSONParser::StringBuilder::CanBeStringPiece() const {
330 return !string_;
331 }
332
333 StringPiece JSONParser::StringBuilder::AsStringPiece() {
334 if (string_)
335 return StringPiece();
336 return StringPiece(pos_, length_);
337 }
338
339 const std::string& JSONParser::StringBuilder::AsString() {
340 if (!string_)
341 Convert();
342 return *string_;
343 }
344
345 // JSONParser private //////////////////////////////////////////////////////////
346
347 inline bool JSONParser::CanConsume(int length) {
348 return pos_ + length <= end_pos_;
349 }
350
351 const char* JSONParser::NextChar() {
352 DCHECK(CanConsume(1));
353 ++index_;
354 ++pos_;
355 return pos_;
356 }
357
358 void JSONParser::NextNChars(int n) {
359 DCHECK(CanConsume(n));
360 index_ += n;
361 pos_ += n;
362 }
363
364 JSONParser::Token JSONParser::GetNextToken() {
365 EatWhitespaceAndComments();
366 if (!CanConsume(1))
367 return T_END_OF_INPUT;
368
369 switch (*pos_) {
370 case '{':
371 return T_OBJECT_BEGIN;
372 case '}':
373 return T_OBJECT_END;
374 case '[':
375 return T_ARRAY_BEGIN;
376 case ']':
377 return T_ARRAY_END;
378 case '"':
379 return T_STRING;
380 case '0':
381 case '1':
382 case '2':
383 case '3':
384 case '4':
385 case '5':
386 case '6':
387 case '7':
388 case '8':
389 case '9':
390 case '-':
391 return T_NUMBER;
392 case 't':
393 return T_BOOL_TRUE;
394 case 'f':
395 return T_BOOL_FALSE;
396 case 'n':
397 return T_NULL;
398 case ',':
399 return T_LIST_SEPARATOR;
400 case ':':
401 return T_OBJECT_PAIR_SEPARATOR;
402 default:
403 return T_INVALID_TOKEN;
404 }
405 }
406
407 void JSONParser::EatWhitespaceAndComments() {
408 while (pos_ < end_pos_) {
409 switch (*pos_) {
410 case '\r':
411 case '\n':
412 index_last_line_ = index_;
413 ++line_number_;
414 // Fall through.
415 case ' ':
416 case '\t':
417 NextChar();
418 break;
419 case '/':
420 if (!EatComment())
421 return;
422 break;
423 default:
424 return;
425 }
426 }
427 }
428
429 bool JSONParser::EatComment() {
430 if (*pos_ != '/' || !CanConsume(1))
431 return false;
432
433 char next_char = *NextChar();
434 if (next_char == '/') {
435 // Single line comment, read to newline.
436 while (CanConsume(1)) {
437 char next_char = *NextChar();
438 if (next_char == '\n' || next_char == '\r')
439 return true;
440 }
441 } else if (next_char == '*') {
442 // Block comment, read until end marker.
443 while (CanConsume(2)) {
444 if (*NextChar() == '*' && *NextChar() == '/') {
Mark Mentovai 2012/05/08 20:19:41 This eats two characters at a time in a loop, so t
Robert Sesek 2012/05/15 16:57:51 Isn't that what's happening? The operator there is
445 // EatWhitespaceAndComments will inspect pos_, which will still be on
446 // the last / of the comment, so advance once more (which may also be
447 // end of input).
448 NextChar();
449 return true;
450 }
451 }
Mark Mentovai 2012/05/08 20:19:41 If the /* is unterminated and you reach the end of
Robert Sesek 2012/05/15 16:57:51 Done.
452 }
453
454 return false;
455 }
456
457 Value* JSONParser::ParseNextToken() {
458 return ParseToken(GetNextToken());
459 }
460
461 Value* JSONParser::ParseToken(Token token) {
462 switch (token) {
463 case T_OBJECT_BEGIN:
464 return ConsumeDictionary();
465 case T_ARRAY_BEGIN:
466 return ConsumeList();
467 case T_STRING:
468 return ConsumeString();
469 case T_NUMBER:
470 return ConsumeNumber();
471 case T_BOOL_TRUE:
472 case T_BOOL_FALSE:
473 case T_NULL:
474 return ConsumeLiteral();
475 default:
476 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
477 return NULL;
478 }
479 }
480
481 Value* JSONParser::ConsumeDictionary() {
482 if (*pos_ != '{') {
483 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
484 return NULL;
485 }
486
487 StackMarker depth_check(&stack_depth_);
488 if (depth_check.IsTooDeep()) {
489 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
490 return NULL;
491 }
492
493 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
494
495 NextChar();
496 Token token = GetNextToken();
497 while (token != T_OBJECT_END) {
498 if (token != T_STRING) {
499 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
500 return NULL;
501 }
502
503 // First consume the key.
504 StringBuilder key;
505 if (!ConsumeStringRaw(&key)) {
506 return NULL;
507 }
508
509 // Read the separator.
510 NextChar();
511 token = GetNextToken();
512 if (token != T_OBJECT_PAIR_SEPARATOR) {
513 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
514 return NULL;
515 }
516
517 // The token is the value. Ownership transfers to |dict|.
518 NextChar();
519 Value* value = ParseNextToken();
520 if (!value) {
521 return NULL;
522 }
523
524 dict->SetWithoutPathExpansion(key.AsString(), value);
525
526 NextChar();
527 token = GetNextToken();
528 if (token == T_LIST_SEPARATOR) {
529 NextChar();
530 token = GetNextToken();
531 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
532 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
533 return NULL;
534 }
535 } else if (token != T_OBJECT_END) {
536 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
537 return NULL;
538 }
539 }
540
541 if (token != T_OBJECT_END)
542 return NULL;
543
544 return dict.release();
545 }
546
547 Value* JSONParser::ConsumeList() {
548 if (*pos_ != '[') {
549 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
550 return NULL;
551 }
552
553 StackMarker depth_check(&stack_depth_);
554 if (depth_check.IsTooDeep()) {
555 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
556 return NULL;
557 }
558
559 scoped_ptr<ListValue> list(new ListValue);
560
561 NextChar();
562 Token token = GetNextToken();
563 while (token != T_ARRAY_END) {
564 Value* item = ParseToken(token);
565 if (!item) {
566 // ReportError from deeper level.
567 return NULL;
568 }
569
570 list->Append(item);
571
572 NextChar();
573 token = GetNextToken();
574 if (token == T_LIST_SEPARATOR) {
575 NextChar();
576 token = GetNextToken();
577 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
578 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
579 return NULL;
580 }
581 } else if (token != T_ARRAY_END) {
582 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
583 return NULL;
584 }
585 }
586
587 if (token != T_ARRAY_END)
588 return NULL;
589
590 return list.release();
591 }
592
593 Value* JSONParser::ConsumeString() {
594 StringBuilder string;
595 if (!ConsumeStringRaw(&string))
596 return NULL;
597
598 // Create the Value representation, either using a hidden root, if configured
599 // to do so, and the string can be represented by StringPiece.
600 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
601 return new JSONStringValue(string.AsStringPiece());
602 } else {
603 if (string.CanBeStringPiece())
604 string.Convert();
605 return new StringValue(string.AsString());
606 }
607 }
608
609 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
610 if (*pos_ != '"') {
611 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
612 return false;
613 }
614
615 // StringBuilder will internally build a StringPiece unless a UTF-16
616 // conversion occurs, at which point it will perform a copy into a
617 // std::string.
618 StringBuilder string(NextChar());
619
620 int length = end_pos_ - start_pos_;
621 int32 next_char = 0;
622
623 DCHECK_EQ(*pos_, *(start_pos_ + index_));
Mark Mentovai 2012/05/08 20:19:41 Why the *s?
Robert Sesek 2012/05/15 16:57:51 Debugging code removed.
624
625 while (CanConsume(1)) {
626 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
627 CBU8_NEXT(start_pos_, index_, length, next_char);
628 if (next_char < 0 || !IsValidCharacter(next_char)) {
629 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
630 return false;
631 }
632
633 // If this character is an escape sequence...
634 if (next_char == '\\') {
635 // The input string will be adjusted (either by combining the two
636 // characters of an encoded escape sequence, or with a UTF conversion),
637 // so using StringPiece isn't possible -- force a conversion.
638 string.Convert();
639
640 if (!CanConsume(1)) {
641 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
642 return false;
643 }
644
645 switch (*NextChar()) {
646 // Allowed esape sequences:
647 case 'x': { // UTF-8 sequence.
648 if (!CanConsume(2)) {
649 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
650 return false;
651 }
652
653 int hex_digit = 0;
654 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
655 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
656 return false;
657 }
658 NextChar();
659
660 if (hex_digit < kExtendedASCIIStart)
661 string.Append(hex_digit);
662 else
663 DecodeUTF8(hex_digit, &string);
Mark Mentovai 2012/05/08 20:19:41 How is this supposed to work? Why don’t I see it i
Robert Sesek 2012/05/15 16:57:51 Documented. I don't want to remove this now (witho
664 break;
665 }
666 case 'u': { // UTF-16 sequence.
667 // UTF units are of the form \uXXXX.
668 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
669 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
670 return false;
671 }
672
673 // Skip the 'u'.
674 NextChar();
675
676 std::string utf8_units;
677 if (!DecodeUTF16(&utf8_units)) {
678 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
679 return false;
680 }
681
682 string.AppendString(utf8_units);
683 break;
684 }
685 case '"':
686 string.Append('"');
687 break;
688 case '\\':
689 string.Append('\\');
690 break;
691 case '/':
692 string.Append('/');
693 break;
694 case 'b':
695 string.Append('\b');
696 break;
697 case 'f':
698 string.Append('\f');
699 break;
700 case 'n':
701 string.Append('\n');
702 break;
703 case 'r':
704 string.Append('\r');
705 break;
706 case 't':
707 string.Append('\t');
708 break;
709 case 'v': // Not listed as valid escape sequence in the RFC.
710 string.Append('\v');
711 break;
712 // All other escape squences are illegal.
713 default:
714 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
715 return false;
716 }
717 } else if (next_char == '"') {
718 --index_; // Rewind by one because of CBU8_NEXT.
719 out->Swap(&string);
720 return true;
721 } else {
722 if (next_char < kExtendedASCIIStart)
723 string.Append(next_char);
724 else
725 DecodeUTF8(next_char, &string);
726 }
727 }
728
729 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
730 return false;
731 }
732
733 // Entry is at the first X in \uXXXX.
734 bool JSONParser::DecodeUTF16(std::string* dest_string) {
735 if (!CanConsume(4))
736 return false;
737
738 // This is a 32-bit field because the shift operations in the
739 // conversion process below cause MSVC to error about "data loss."
740 // This only stores UTF-16 code units, though.
741 // Consume the UTF-16 code unit, which may be a high surrogate.
742 int code_unit16_high = 0;
743 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
744 return false;
745
746 // Only add 3, not 4, because at the end of this iteration, the parser has
747 // finished working with the last digit of the UTF sequence, meaning that
748 // the next spin of the loop will advance to the next byte.
749 NextNChars(3);
750
751 // If this is a high surrogate, consume the next code unit to get the
752 // low surrogate.
753 int code_unit16_low = 0;
754 if (CBU16_IS_SURROGATE(code_unit16_high)) {
755 // Make sure this is the high surrogate. If not, it's an encoding
756 // error.
757 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
758 return false;
759
760 // Make sure that the token has more characters to consume the
761 // lower surrogate.
762 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
763 return false;
764 if (*NextChar() != '\\' || *NextChar() != 'u')
765 return false;
766
767 NextChar(); // Read past 'u'.
768 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
769 return false;
770
771 NextNChars(3);
772
773 if (!CBU16_IS_SURROGATE(code_unit16_low) ||
774 !CBU16_IS_TRAIL(code_unit16_low)) {
Mark Mentovai 2012/05/08 20:19:41 CBU16_IS_TRAIL implies CBU16_IS_SURROGATE, you onl
Robert Sesek 2012/05/15 16:57:51 Done.
775 return false;
776 }
777 } else if (!CBU16_IS_SINGLE(code_unit16_high)) {
Mark Mentovai 2012/05/08 20:19:41 CBU16_IS_SINGLE is defined as !CBU16_IS_SURROGATE,
Robert Sesek 2012/05/15 16:57:51 Done.
778 // If this is not a code point, it's an encoding error.
779 return false;
780 }
781
782 // Convert the UTF-16 code units to a code point and then to a UTF-8
783 // code unit sequence.
784 char code_point[8] = { 0 };
785 size_t offset = 0;
786 if (!code_unit16_low) {
Mark Mentovai 2012/05/08 20:19:41 Rather than rechecking this, why don’t you do it i
Robert Sesek 2012/05/15 16:57:51 Done.
787 CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
788 } else {
789 uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
790 code_unit16_low);
791 offset = 0;
792 CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
Mark Mentovai 2012/05/08 20:19:41 And the same for this, except you’d put it in the
Robert Sesek 2012/05/15 16:57:51 Done.
793 }
794 dest_string->append(code_point);
795 return true;
796 }
797
798 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
799 // Anything outside of the basic ASCII plane will need to be decomposed from
800 // int32 to a multi-byte sequence.
801 if (point < kExtendedASCIIStart) {
802 dest->Append(point);
803 } else {
804 char utf8_units[4] = { 0 };
805 int offset = 0;
806 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
807 dest->Convert();
808 dest->AppendString(utf8_units);
809 }
810 }
811
812 Value* JSONParser::ConsumeNumber() {
813 const char* num_start = pos_;
814 const int start_index = index_;
815 int end_index = start_index;
816
817 if (*pos_ == '-')
818 NextChar();
819
820 if (!ReadInt(false)) {
821 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
822 return NULL;
823 }
824 end_index = index_;
825
826 // The optional faction part.
Mark Mentovai 2012/05/08 20:19:41 fraction
Robert Sesek 2012/05/15 16:57:51 Done.
827 if (*pos_ == '.') {
828 if (!CanConsume(1)) {
829 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
830 return NULL;
831 }
832 NextChar();
833 if (!ReadInt(true)) {
834 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
835 return NULL;
836 }
837 end_index = index_;
838 }
839
840 // Optional exponent part.
841 if (*pos_ == 'e' || *pos_ == 'E') {
842 NextChar();
843 if (*pos_ == '-' || *pos_ == '+')
844 NextChar();
845 if (!ReadInt(true)) {
846 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
847 return NULL;
848 }
849 end_index = index_;
850 }
851
852 // ReadInt is greedy because numbers have no easily detectable sentinel,
853 // so save off where the parser should be on exit (see Consume invariant at
854 // the top of the header), then make sure the next token is one which is
855 // valid.
856 const char* exit_pos = pos_ - 1;
857 int exit_index = index_ - 1;
858
859 switch (GetNextToken()) {
860 case T_OBJECT_END:
861 case T_ARRAY_END:
862 case T_LIST_SEPARATOR:
863 case T_END_OF_INPUT:
864 break;
865 default:
866 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
867 return NULL;
868 }
869
870 pos_ = exit_pos;
871 index_ = exit_index;
872
873 StringPiece num_string(num_start, end_index - start_index);
874
875 int num_int;
876 if (StringToInt(num_string, &num_int))
877 return Value::CreateIntegerValue(num_int);
878
879 double num_double;
880 if (base::StringToDouble(num_string.as_string(), &num_double) &&
881 IsFinite(num_double)) {
882 return Value::CreateDoubleValue(num_double);
883 }
884
885 return NULL;
886 }
887
888 bool JSONParser::ReadInt(bool allow_leading_zeros) {
889 char first = *pos_;
890 int len = 0;
891
892 char c = first;
893 while (CanConsume(1) && IsAsciiDigit(c)) {
894 c = *NextChar();
895 ++len;
896 }
897
898 if (len == 0)
899 return false;
900
901 if (!allow_leading_zeros && len > 1 && first == '0')
902 return false;
903
904 return true;
905 }
906
907 Value* JSONParser::ConsumeLiteral() {
908 switch (*pos_) {
909 case 't':
Mark Mentovai 2012/05/08 20:19:41 I’d be more comfortable having kTrueLiteral[] = "t
Robert Sesek 2012/05/15 16:57:51 Done.
910 if (!CanConsume(3) || !StringsAreEqual(pos_, "true", 4)) {
911 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
912 return NULL;
913 }
914 NextNChars(3);
915 return Value::CreateBooleanValue(true);
916 case 'f':
917 if (!CanConsume(4) || !StringsAreEqual(pos_, "false", 5)) {
918 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
919 return NULL;
920 }
921 NextNChars(4);
922 return Value::CreateBooleanValue(false);
923 case 'n':
924 if (!CanConsume(3) || !StringsAreEqual(pos_, "null", 4)) {
925 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
926 return NULL;
927 }
928 NextNChars(3);
929 return Value::CreateNullValue();
930 default:
931 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
932 return NULL;
933 }
934 }
935
936 // static
937 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
938 return strncmp(one, two, len) == 0;
939 }
940
941 void JSONParser::ReportError(JSONReader::JsonParseError code,
942 int column_adjust) {
943 error_code_ = code;
944 error_line_ = line_number_;
945 error_column_ = index_ - index_last_line_ + column_adjust;
946 }
947
948 // static
949 std::string JSONParser::FormatErrorMessage(int line, int column,
950 const std::string& description) {
951 if (line || column) {
Mark Mentovai 2012/05/08 20:19:41 Do you ever have !line && column, or the other way
Robert Sesek 2012/05/15 16:57:51 No, but one could be zero.
952 return StringPrintf("Line: %i, column: %i, %s",
953 line, column, description.c_str());
954 }
955 return description;
956 }
957
958 } // namespace internal
959 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698