Index: base/json/json_parser.cc |
diff --git a/base/json/json_parser.cc b/base/json/json_parser.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..de2f55fabfe4fa0fd413f8eb343c5b8b559929b9 |
--- /dev/null |
+++ b/base/json/json_parser.cc |
@@ -0,0 +1,972 @@ |
+// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "base/json/json_parser.h" |
+ |
+#include "base/float_util.h" |
+#include "base/logging.h" |
+#include "base/memory/scoped_ptr.h" |
+#include "base/string_number_conversions.h" |
+#include "base/string_util.h" |
+#include "base/stringprintf.h" |
+#include "base/third_party/icu/icu_utf.h" |
+#include "base/utf_string_conversion_utils.h" |
+#include "base/utf_string_conversions.h" |
+#include "base/values.h" |
+ |
+namespace base { |
+namespace internal { |
+ |
+namespace { |
+ |
+const int kStackMaxDepth = 100; |
+ |
+const int32 kExtendedASCIIStart = 0x80; |
+ |
+// This and the class below are used to own the JSON input string for when |
+// string tokens are stored as StringPiece instead of std::string. This |
+// optimization avoids about 2/3rds of string memory copies. The constructor |
+// takes the input string and swaps its data into the new instance. The real |
+// root value is also Swap()ed into the new instance. |
+class DictionaryHiddenRootValue : public base::DictionaryValue { |
+ public: |
+ DictionaryHiddenRootValue(std::string* json, Value* root) { |
+ DCHECK(root->IsType(Value::TYPE_DICTIONARY)); |
+ DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); |
+ json->swap(json_); |
+ } |
+ |
+ virtual void Swap(DictionaryValue* other) OVERRIDE { |
+ DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; |
+ |
+ // First deep copy to convert JSONStringValue to std::string and swap that |
+ // copy with |other|, which contains the new contents of |this|. |
+ scoped_ptr<base::DictionaryValue> copy(DeepCopy()); |
+ copy->Swap(other); |
+ |
+ // Then erase the contents of the current dictionary and swap in the |
+ // new contents, originally from |other|. |
+ Clear(); |
+ json_.clear(); |
+ DictionaryValue::Swap(copy.get()); |
+ } |
+ |
+ // Not overriding DictionaryValue::Remove because it just calls through to |
+ // the method below. |
+ |
+ virtual bool RemoveWithoutPathExpansion(const std::string& key, |
+ Value** out) OVERRIDE { |
+ // If the caller won't take ownership of the removed value, just call up. |
+ if (!out) |
+ return DictionaryValue::RemoveWithoutPathExpansion(key, out); |
+ |
+ DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; |
+ |
+ // Otherwise, remove the value while its still "owned" by this and copy it |
+ // to convert any JSONStringValues to std::string. |
+ Value* out_owned = NULL; |
+ if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) |
+ return false; |
+ |
+ *out = out_owned->DeepCopy(); |
+ delete out_owned; |
+ |
+ return true; |
+ } |
+ |
+ private: |
+ std::string json_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); |
+}; |
+ |
+class ListHiddenRootValue : public base::ListValue { |
+ public: |
+ ListHiddenRootValue(std::string* json, Value* root) { |
+ DCHECK(root->IsType(Value::TYPE_LIST)); |
+ ListValue::Swap(static_cast<ListValue*>(root)); |
+ json->swap(json_); |
+ } |
+ |
+ virtual void Swap(ListValue* other) OVERRIDE { |
+ DVLOG(1) << "Swap()ing a ListValue inefficiently."; |
+ |
+ // First deep copy to convert JSONStringValue to std::string and swap that |
+ // copy with |other|, which contains the new contents of |this|. |
+ scoped_ptr<base::ListValue> copy(DeepCopy()); |
+ copy->Swap(other); |
+ |
+ // Then erase the contents of the current list and swap in the new contents, |
+ // originally from |other|. |
+ Clear(); |
+ json_.clear(); |
+ ListValue::Swap(copy.get()); |
+ } |
+ |
+ virtual bool Remove(size_t index, Value** out) OVERRIDE { |
+ // If the caller won't take ownership of the removed value, just call up. |
+ if (!out) |
+ return ListValue::Remove(index, out); |
+ |
+ DVLOG(1) << "Remove()ing from a ListValue inefficiently."; |
+ |
+ // Otherwise, remove the value while its still "owned" by this and copy it |
+ // to convert any JSONStringValues to std::string. |
+ Value* out_owned = NULL; |
+ if (!ListValue::Remove(index, &out_owned)) |
+ return false; |
+ |
+ *out = out_owned->DeepCopy(); |
+ delete out_owned; |
+ |
+ return true; |
+ } |
+ |
+ private: |
+ std::string json_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); |
+}; |
+ |
+// A variant on StringValue that uses StringPiece instead of copying the string |
+// into the Value. This can only be stored in a child of hidden root (above), |
+// otherwise the referenced string will not be guaranteed to outlive it. |
+class JSONStringValue : public base::Value { |
+ public: |
+ explicit JSONStringValue(const base::StringPiece& piece) |
+ : Value(TYPE_STRING), |
+ string_piece_(piece) { |
+ } |
+ |
+ // Value: |
+ bool GetAsString(std::string* out_value) const OVERRIDE { |
+ string_piece_.CopyToString(out_value); |
+ return true; |
+ } |
+ bool GetAsString(string16* out_value) const OVERRIDE { |
+ *out_value = UTF8ToUTF16(string_piece_); |
+ return true; |
+ } |
+ virtual Value* DeepCopy() const OVERRIDE { |
+ return Value::CreateStringValue(string_piece_.as_string()); |
+ } |
+ virtual bool Equals(const Value* other) const OVERRIDE { |
+ std::string other_string; |
+ return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && |
+ StringPiece(other_string) == string_piece_; |
+ } |
+ |
+ private: |
+ // The location in the original input stream. |
+ base::StringPiece string_piece_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(JSONStringValue); |
+}; |
+ |
+// Simple class that checks for maximum recursion/"stack overflow." |
+class StackMarker { |
+ public: |
+ explicit StackMarker(int* depth) : depth_(depth) { |
+ ++(*depth_); |
+ DCHECK_LE(*depth_, kStackMaxDepth); |
+ } |
+ ~StackMarker() { |
+ --(*depth_); |
+ } |
+ |
+ bool IsTooDeep() const { |
+ return *depth_ >= kStackMaxDepth; |
+ } |
+ |
+ private: |
+ int* const depth_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(StackMarker); |
+}; |
+ |
+} // namespace |
+ |
+JSONParser::JSONParser(int options) |
+ : options_(options), |
+ start_pos_(NULL), |
+ pos_(NULL), |
+ end_pos_(NULL), |
+ index_(0), |
+ stack_depth_(0), |
+ line_number_(0), |
+ index_last_line_(0), |
+ error_code_(JSONReader::JSON_NO_ERROR), |
+ error_line_(0), |
+ error_column_(0) { |
+} |
+ |
+JSONParser::~JSONParser() { |
+} |
+ |
+Value* JSONParser::Parse(const std::string& input) { |
+ // TODO(rsesek): Windows has problems with StringPiece/hidden roots. Fix |
+ // <http://crbug.com/126107> when my Windows box arrives. |
+#if defined(OS_WIN) |
+ options_ |= JSON_DETACHABLE_CHILDREN; |
+#endif |
+ |
+ std::string input_copy; |
+ // If the children of a JSON root can be detached, then hidden roots cannot |
+ // be used, so do not bother copying the input because StringPiece will not |
+ // be used anywhere. |
+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) { |
+ input_copy = input; |
+ start_pos_ = input_copy.data(); |
+ } else { |
+ start_pos_ = input.data(); |
+ } |
+ pos_ = start_pos_; |
+ end_pos_ = start_pos_ + input.length(); |
+ index_ = 0; |
+ line_number_ = 1; |
+ index_last_line_ = 0; |
+ |
+ error_code_ = JSONReader::JSON_NO_ERROR; |
+ error_line_ = 0; |
+ error_column_ = 0; |
+ |
+ // When the input JSON string starts with a UTF-8 Byte-Order-Mark |
+ // <0xEF 0xBB 0xBF>, advance the start position to avoid the |
+ // ParseNextToken function mis-treating a Unicode BOM as an invalid |
+ // character and returning NULL. |
+ if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && |
+ static_cast<uint8>(*(pos_ + 1)) == 0xBB && |
+ static_cast<uint8>(*(pos_ + 2)) == 0xBF) { |
+ NextNChars(3); |
+ } |
+ |
+ // Parse the first and all subsequent tokens. |
+ scoped_ptr<Value> root(ParseNextToken()); |
+ if (!root.get()) |
+ return NULL; |
+ |
+ // Make sure the input stream is at an end. |
+ if (GetNextToken() != T_END_OF_INPUT) { |
+ if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { |
+ ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); |
+ return NULL; |
+ } |
+ } |
+ |
+ // Dictionaries and lists can contain JSONStringValues, so wrap them in a |
+ // hidden root. |
+ if (!(options_ & JSON_DETACHABLE_CHILDREN)) { |
+ if (root->IsType(Value::TYPE_DICTIONARY)) { |
+ return new DictionaryHiddenRootValue(&input_copy, root.release()); |
+ } else if (root->IsType(Value::TYPE_LIST)) { |
+ return new ListHiddenRootValue(&input_copy, root.release()); |
+ } else if (root->IsType(Value::TYPE_STRING)) { |
+ // A string type could be a JSONStringValue, but because there's no |
+ // corresponding HiddenRootValue, the memory will be lost. Deep copy to |
+ // preserve it. |
+ return root->DeepCopy(); |
+ } |
+ } |
+ |
+ // All other values can be returned directly. |
+ return root.release(); |
+} |
+ |
+JSONReader::JsonParseError JSONParser::error_code() const { |
+ return error_code_; |
+} |
+ |
+std::string JSONParser::GetErrorMessage() const { |
+ return FormatErrorMessage(error_line_, error_column_, |
+ JSONReader::ErrorCodeToString(error_code_)); |
+} |
+ |
+// StringBuilder /////////////////////////////////////////////////////////////// |
+ |
+JSONParser::StringBuilder::StringBuilder() |
+ : pos_(NULL), |
+ length_(0), |
+ string_(NULL) { |
+} |
+ |
+JSONParser::StringBuilder::StringBuilder(const char* pos) |
+ : pos_(pos), |
+ length_(0), |
+ string_(NULL) { |
+} |
+ |
+void JSONParser::StringBuilder::Swap(StringBuilder* other) { |
+ std::swap(other->string_, string_); |
+ std::swap(other->pos_, pos_); |
+ std::swap(other->length_, length_); |
+} |
+ |
+JSONParser::StringBuilder::~StringBuilder() { |
+ delete string_; |
+} |
+ |
+void JSONParser::StringBuilder::Append(const char& c) { |
+ DCHECK_GE(c, 0); |
+ DCHECK_LT(c, 128); |
+ |
+ if (string_) |
+ string_->push_back(c); |
+ else |
+ ++length_; |
+} |
+ |
+void JSONParser::StringBuilder::AppendString(const std::string& str) { |
+ DCHECK(string_); |
+ string_->append(str); |
+} |
+ |
+void JSONParser::StringBuilder::Convert() { |
+ if (string_) |
+ return; |
+ string_ = new std::string(pos_, length_); |
+} |
+ |
+bool JSONParser::StringBuilder::CanBeStringPiece() const { |
+ return !string_; |
+} |
+ |
+StringPiece JSONParser::StringBuilder::AsStringPiece() { |
+ if (string_) |
+ return StringPiece(); |
+ return StringPiece(pos_, length_); |
+} |
+ |
+const std::string& JSONParser::StringBuilder::AsString() { |
+ if (!string_) |
+ Convert(); |
+ return *string_; |
+} |
+ |
+// JSONParser private ////////////////////////////////////////////////////////// |
+ |
+inline bool JSONParser::CanConsume(int length) { |
+ return pos_ + length <= end_pos_; |
+} |
+ |
+const char* JSONParser::NextChar() { |
+ DCHECK(CanConsume(1)); |
+ ++index_; |
+ ++pos_; |
+ return pos_; |
+} |
+ |
+void JSONParser::NextNChars(int n) { |
+ DCHECK(CanConsume(n)); |
+ index_ += n; |
+ pos_ += n; |
+} |
+ |
+JSONParser::Token JSONParser::GetNextToken() { |
+ EatWhitespaceAndComments(); |
+ if (!CanConsume(1)) |
+ return T_END_OF_INPUT; |
+ |
+ switch (*pos_) { |
+ case '{': |
+ return T_OBJECT_BEGIN; |
+ case '}': |
+ return T_OBJECT_END; |
+ case '[': |
+ return T_ARRAY_BEGIN; |
+ case ']': |
+ return T_ARRAY_END; |
+ case '"': |
+ return T_STRING; |
+ case '0': |
+ case '1': |
+ case '2': |
+ case '3': |
+ case '4': |
+ case '5': |
+ case '6': |
+ case '7': |
+ case '8': |
+ case '9': |
+ case '-': |
+ return T_NUMBER; |
+ case 't': |
+ return T_BOOL_TRUE; |
+ case 'f': |
+ return T_BOOL_FALSE; |
+ case 'n': |
+ return T_NULL; |
+ case ',': |
+ return T_LIST_SEPARATOR; |
+ case ':': |
+ return T_OBJECT_PAIR_SEPARATOR; |
+ default: |
+ return T_INVALID_TOKEN; |
+ } |
+} |
+ |
+void JSONParser::EatWhitespaceAndComments() { |
+ while (pos_ < end_pos_) { |
+ switch (*pos_) { |
+ case '\r': |
+ case '\n': |
+ index_last_line_ = index_; |
+ ++line_number_; |
+ // Fall through. |
+ case ' ': |
+ case '\t': |
+ NextChar(); |
+ break; |
+ case '/': |
+ if (!EatComment()) |
+ return; |
+ break; |
+ default: |
+ return; |
+ } |
+ } |
+} |
+ |
+bool JSONParser::EatComment() { |
+ if (*pos_ != '/' || !CanConsume(1)) |
+ return false; |
+ |
+ char next_char = *NextChar(); |
+ if (next_char == '/') { |
+ // Single line comment, read to newline. |
+ while (CanConsume(1)) { |
+ char next_char = *NextChar(); |
+ if (next_char == '\n' || next_char == '\r') |
+ return true; |
+ } |
+ } else if (next_char == '*') { |
+ // Block comment, read until end marker. |
+ while (CanConsume(2)) { |
+ if (*NextChar() == '*' && *NextChar() == '/') { |
+ // EatWhitespaceAndComments will inspect pos_, which will still be on |
+ // the last / of the comment, so advance once more (which may also be |
+ // end of input). |
+ NextChar(); |
+ return true; |
+ } |
+ } |
+ |
+ // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. |
+ } |
+ |
+ return false; |
+} |
+ |
+Value* JSONParser::ParseNextToken() { |
+ return ParseToken(GetNextToken()); |
+} |
+ |
+Value* JSONParser::ParseToken(Token token) { |
+ switch (token) { |
+ case T_OBJECT_BEGIN: |
+ return ConsumeDictionary(); |
+ case T_ARRAY_BEGIN: |
+ return ConsumeList(); |
+ case T_STRING: |
+ return ConsumeString(); |
+ case T_NUMBER: |
+ return ConsumeNumber(); |
+ case T_BOOL_TRUE: |
+ case T_BOOL_FALSE: |
+ case T_NULL: |
+ return ConsumeLiteral(); |
+ default: |
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
+ return NULL; |
+ } |
+} |
+ |
+Value* JSONParser::ConsumeDictionary() { |
+ if (*pos_ != '{') { |
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
+ return NULL; |
+ } |
+ |
+ StackMarker depth_check(&stack_depth_); |
+ if (depth_check.IsTooDeep()) { |
+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); |
+ return NULL; |
+ } |
+ |
+ scoped_ptr<DictionaryValue> dict(new DictionaryValue); |
+ |
+ NextChar(); |
+ Token token = GetNextToken(); |
+ while (token != T_OBJECT_END) { |
+ if (token != T_STRING) { |
+ ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); |
+ return NULL; |
+ } |
+ |
+ // First consume the key. |
+ StringBuilder key; |
+ if (!ConsumeStringRaw(&key)) { |
+ return NULL; |
+ } |
+ |
+ // Read the separator. |
+ NextChar(); |
+ token = GetNextToken(); |
+ if (token != T_OBJECT_PAIR_SEPARATOR) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ |
+ // The token is the value. Ownership transfers to |dict|. |
+ NextChar(); |
+ Value* value = ParseNextToken(); |
+ if (!value) { |
+ return NULL; |
+ } |
+ |
+ dict->SetWithoutPathExpansion(key.AsString(), value); |
+ |
+ NextChar(); |
+ token = GetNextToken(); |
+ if (token == T_LIST_SEPARATOR) { |
+ NextChar(); |
+ token = GetNextToken(); |
+ if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { |
+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1); |
+ return NULL; |
+ } |
+ } else if (token != T_OBJECT_END) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); |
+ return NULL; |
+ } |
+ } |
+ |
+ if (token != T_OBJECT_END) |
+ return NULL; |
+ |
+ return dict.release(); |
+} |
+ |
+Value* JSONParser::ConsumeList() { |
+ if (*pos_ != '[') { |
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
+ return NULL; |
+ } |
+ |
+ StackMarker depth_check(&stack_depth_); |
+ if (depth_check.IsTooDeep()) { |
+ ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); |
+ return NULL; |
+ } |
+ |
+ scoped_ptr<ListValue> list(new ListValue); |
+ |
+ NextChar(); |
+ Token token = GetNextToken(); |
+ while (token != T_ARRAY_END) { |
+ Value* item = ParseToken(token); |
+ if (!item) { |
+ // ReportError from deeper level. |
+ return NULL; |
+ } |
+ |
+ list->Append(item); |
+ |
+ NextChar(); |
+ token = GetNextToken(); |
+ if (token == T_LIST_SEPARATOR) { |
+ NextChar(); |
+ token = GetNextToken(); |
+ if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { |
+ ReportError(JSONReader::JSON_TRAILING_COMMA, 1); |
+ return NULL; |
+ } |
+ } else if (token != T_ARRAY_END) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ } |
+ |
+ if (token != T_ARRAY_END) |
+ return NULL; |
+ |
+ return list.release(); |
+} |
+ |
+Value* JSONParser::ConsumeString() { |
+ StringBuilder string; |
+ if (!ConsumeStringRaw(&string)) |
+ return NULL; |
+ |
+ // Create the Value representation, either using a hidden root, if configured |
+ // to do so, and the string can be represented by StringPiece. |
+ if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { |
+ return new JSONStringValue(string.AsStringPiece()); |
+ } else { |
+ if (string.CanBeStringPiece()) |
+ string.Convert(); |
+ return new StringValue(string.AsString()); |
+ } |
+} |
+ |
+bool JSONParser::ConsumeStringRaw(StringBuilder* out) { |
+ if (*pos_ != '"') { |
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
+ return false; |
+ } |
+ |
+ // StringBuilder will internally build a StringPiece unless a UTF-16 |
+ // conversion occurs, at which point it will perform a copy into a |
+ // std::string. |
+ StringBuilder string(NextChar()); |
+ |
+ int length = end_pos_ - start_pos_; |
+ int32 next_char = 0; |
+ |
+ while (CanConsume(1)) { |
+ pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. |
+ CBU8_NEXT(start_pos_, index_, length, next_char); |
+ if (next_char < 0 || !IsValidCharacter(next_char)) { |
+ ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); |
+ return false; |
+ } |
+ |
+ // If this character is an escape sequence... |
+ if (next_char == '\\') { |
+ // The input string will be adjusted (either by combining the two |
+ // characters of an encoded escape sequence, or with a UTF conversion), |
+ // so using StringPiece isn't possible -- force a conversion. |
+ string.Convert(); |
+ |
+ if (!CanConsume(1)) { |
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
+ return false; |
+ } |
+ |
+ switch (*NextChar()) { |
+ // Allowed esape sequences: |
+ case 'x': { // UTF-8 sequence. |
+ // UTF-8 \x escape sequences are not allowed in the spec, but they |
+ // are supported here for backwards-compatiblity with the old parser. |
+ if (!CanConsume(2)) { |
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); |
+ return false; |
+ } |
+ |
+ int hex_digit = 0; |
+ if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { |
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); |
+ return false; |
+ } |
+ NextChar(); |
+ |
+ if (hex_digit < kExtendedASCIIStart) |
+ string.Append(hex_digit); |
+ else |
+ DecodeUTF8(hex_digit, &string); |
+ break; |
+ } |
+ case 'u': { // UTF-16 sequence. |
+ // UTF units are of the form \uXXXX. |
+ if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. |
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
+ return false; |
+ } |
+ |
+ // Skip the 'u'. |
+ NextChar(); |
+ |
+ std::string utf8_units; |
+ if (!DecodeUTF16(&utf8_units)) { |
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); |
+ return false; |
+ } |
+ |
+ string.AppendString(utf8_units); |
+ break; |
+ } |
+ case '"': |
+ string.Append('"'); |
+ break; |
+ case '\\': |
+ string.Append('\\'); |
+ break; |
+ case '/': |
+ string.Append('/'); |
+ break; |
+ case 'b': |
+ string.Append('\b'); |
+ break; |
+ case 'f': |
+ string.Append('\f'); |
+ break; |
+ case 'n': |
+ string.Append('\n'); |
+ break; |
+ case 'r': |
+ string.Append('\r'); |
+ break; |
+ case 't': |
+ string.Append('\t'); |
+ break; |
+ case 'v': // Not listed as valid escape sequence in the RFC. |
+ string.Append('\v'); |
+ break; |
+ // All other escape squences are illegal. |
+ default: |
+ ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
+ return false; |
+ } |
+ } else if (next_char == '"') { |
+ --index_; // Rewind by one because of CBU8_NEXT. |
+ out->Swap(&string); |
+ return true; |
+ } else { |
+ if (next_char < kExtendedASCIIStart) |
+ string.Append(next_char); |
+ else |
+ DecodeUTF8(next_char, &string); |
+ } |
+ } |
+ |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); |
+ return false; |
+} |
+ |
+// Entry is at the first X in \uXXXX. |
+bool JSONParser::DecodeUTF16(std::string* dest_string) { |
+ if (!CanConsume(4)) |
+ return false; |
+ |
+ // This is a 32-bit field because the shift operations in the |
+ // conversion process below cause MSVC to error about "data loss." |
+ // This only stores UTF-16 code units, though. |
+ // Consume the UTF-16 code unit, which may be a high surrogate. |
+ int code_unit16_high = 0; |
+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) |
+ return false; |
+ |
+ // Only add 3, not 4, because at the end of this iteration, the parser has |
+ // finished working with the last digit of the UTF sequence, meaning that |
+ // the next iteration will advance to the next byte. |
+ NextNChars(3); |
+ |
+ // Used to convert the UTF-16 code units to a code point and then to a UTF-8 |
+ // code unit sequence. |
+ char code_point[8] = { 0 }; |
+ size_t offset = 0; |
+ |
+ // If this is a high surrogate, consume the next code unit to get the |
+ // low surrogate. |
+ if (CBU16_IS_SURROGATE(code_unit16_high)) { |
+ // Make sure this is the high surrogate. If not, it's an encoding |
+ // error. |
+ if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) |
+ return false; |
+ |
+ // Make sure that the token has more characters to consume the |
+ // lower surrogate. |
+ if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. |
+ return false; |
+ if (*NextChar() != '\\' || *NextChar() != 'u') |
+ return false; |
+ |
+ NextChar(); // Read past 'u'. |
+ int code_unit16_low = 0; |
+ if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) |
+ return false; |
+ |
+ NextNChars(3); |
+ |
+ if (!CBU16_IS_TRAIL(code_unit16_low)) { |
+ return false; |
+ } |
+ |
+ uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high, |
+ code_unit16_low); |
+ offset = 0; |
+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit32); |
+ } else { |
+ // Not a surrogate. |
+ DCHECK(CBU16_IS_SINGLE(code_unit16_high)); |
+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high); |
+ } |
+ |
+ dest_string->append(code_point); |
+ return true; |
+} |
+ |
+void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) { |
+ // Anything outside of the basic ASCII plane will need to be decomposed from |
+ // int32 to a multi-byte sequence. |
+ if (point < kExtendedASCIIStart) { |
+ dest->Append(point); |
+ } else { |
+ char utf8_units[4] = { 0 }; |
+ int offset = 0; |
+ CBU8_APPEND_UNSAFE(utf8_units, offset, point); |
+ dest->Convert(); |
+ dest->AppendString(utf8_units); |
+ } |
+} |
+ |
+Value* JSONParser::ConsumeNumber() { |
+ const char* num_start = pos_; |
+ const int start_index = index_; |
+ int end_index = start_index; |
+ |
+ if (*pos_ == '-') |
+ NextChar(); |
+ |
+ if (!ReadInt(false)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ end_index = index_; |
+ |
+ // The optional fraction part. |
+ if (*pos_ == '.') { |
+ if (!CanConsume(1)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ NextChar(); |
+ if (!ReadInt(true)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ end_index = index_; |
+ } |
+ |
+ // Optional exponent part. |
+ if (*pos_ == 'e' || *pos_ == 'E') { |
+ NextChar(); |
+ if (*pos_ == '-' || *pos_ == '+') |
+ NextChar(); |
+ if (!ReadInt(true)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ end_index = index_; |
+ } |
+ |
+ // ReadInt is greedy because numbers have no easily detectable sentinel, |
+ // so save off where the parser should be on exit (see Consume invariant at |
+ // the top of the header), then make sure the next token is one which is |
+ // valid. |
+ const char* exit_pos = pos_ - 1; |
+ int exit_index = index_ - 1; |
+ |
+ switch (GetNextToken()) { |
+ case T_OBJECT_END: |
+ case T_ARRAY_END: |
+ case T_LIST_SEPARATOR: |
+ case T_END_OF_INPUT: |
+ break; |
+ default: |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ |
+ pos_ = exit_pos; |
+ index_ = exit_index; |
+ |
+ StringPiece num_string(num_start, end_index - start_index); |
+ |
+ int num_int; |
+ if (StringToInt(num_string, &num_int)) |
+ return Value::CreateIntegerValue(num_int); |
+ |
+ double num_double; |
+ if (base::StringToDouble(num_string.as_string(), &num_double) && |
+ IsFinite(num_double)) { |
+ return Value::CreateDoubleValue(num_double); |
+ } |
+ |
+ return NULL; |
+} |
+ |
+bool JSONParser::ReadInt(bool allow_leading_zeros) { |
+ char first = *pos_; |
+ int len = 0; |
+ |
+ char c = first; |
+ while (CanConsume(1) && IsAsciiDigit(c)) { |
+ c = *NextChar(); |
+ ++len; |
+ } |
+ |
+ if (len == 0) |
+ return false; |
+ |
+ if (!allow_leading_zeros && len > 1 && first == '0') |
+ return false; |
+ |
+ return true; |
+} |
+ |
+Value* JSONParser::ConsumeLiteral() { |
+ switch (*pos_) { |
+ case 't': { |
+ const char* kTrueLiteral = "true"; |
+ const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); |
+ if (!CanConsume(kTrueLen - 1) || |
+ !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ NextNChars(kTrueLen - 1); |
+ return Value::CreateBooleanValue(true); |
+ } |
+ case 'f': { |
+ const char* kFalseLiteral = "false"; |
+ const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); |
+ if (!CanConsume(kFalseLen - 1) || |
+ !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ NextNChars(kFalseLen - 1); |
+ return Value::CreateBooleanValue(false); |
+ } |
+ case 'n': { |
+ const char* kNullLiteral = "null"; |
+ const int kNullLen = static_cast<int>(strlen(kNullLiteral)); |
+ if (!CanConsume(kNullLen - 1) || |
+ !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { |
+ ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
+ return NULL; |
+ } |
+ NextNChars(kNullLen - 1); |
+ return Value::CreateNullValue(); |
+ } |
+ default: |
+ ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
+ return NULL; |
+ } |
+} |
+ |
+// static |
+bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { |
+ return strncmp(one, two, len) == 0; |
+} |
+ |
+void JSONParser::ReportError(JSONReader::JsonParseError code, |
+ int column_adjust) { |
+ error_code_ = code; |
+ error_line_ = line_number_; |
+ error_column_ = index_ - index_last_line_ + column_adjust; |
+} |
+ |
+// static |
+std::string JSONParser::FormatErrorMessage(int line, int column, |
+ const std::string& description) { |
+ if (line || column) { |
+ return StringPrintf("Line: %i, column: %i, %s", |
+ line, column, description.c_str()); |
+ } |
+ return description; |
+} |
+ |
+} // namespace internal |
+} // namespace base |