Index: base/json_reader.cc |
=================================================================== |
--- base/json_reader.cc (revision 29830) |
+++ base/json_reader.cc (working copy) |
@@ -1,638 +0,0 @@ |
-// Copyright (c) 2009 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "base/json_reader.h" |
- |
-#include "base/float_util.h" |
-#include "base/logging.h" |
-#include "base/scoped_ptr.h" |
-#include "base/string_util.h" |
-#include "base/utf_string_conversions.h" |
-#include "base/values.h" |
- |
-static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN, |
- 0, 0); |
-static const int kStackLimit = 100; |
- |
-namespace { |
- |
-inline int HexToInt(wchar_t c) { |
- if ('0' <= c && c <= '9') { |
- return c - '0'; |
- } else if ('A' <= c && c <= 'F') { |
- return c - 'A' + 10; |
- } else if ('a' <= c && c <= 'f') { |
- return c - 'a' + 10; |
- } |
- NOTREACHED(); |
- return 0; |
-} |
- |
-// A helper method for ParseNumberToken. It reads an int from the end of |
-// token. The method returns false if there is no valid integer at the end of |
-// the token. |
-bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) { |
- wchar_t first = token.NextChar(); |
- int len = 0; |
- |
- // Read in more digits |
- wchar_t c = first; |
- while ('\0' != c && '0' <= c && c <= '9') { |
- ++token.length; |
- ++len; |
- c = token.NextChar(); |
- } |
- // We need at least 1 digit. |
- if (len == 0) |
- return false; |
- |
- if (!can_have_leading_zeros && len > 1 && '0' == first) |
- return false; |
- |
- return true; |
-} |
- |
-// A helper method for ParseStringToken. It reads |digits| hex digits from the |
-// token. If the sequence if digits is not valid (contains other characters), |
-// the method returns false. |
-bool ReadHexDigits(JSONReader::Token& token, int digits) { |
- for (int i = 1; i <= digits; ++i) { |
- wchar_t c = *(token.begin + token.length + i); |
- if ('\0' == c) |
- return false; |
- if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || |
- ('A' <= c && c <= 'F'))) { |
- return false; |
- } |
- } |
- |
- token.length += digits; |
- return true; |
-} |
- |
-} // anonymous namespace |
- |
-const char* JSONReader::kBadRootElementType = |
- "Root value must be an array or object."; |
-const char* JSONReader::kInvalidEscape = |
- "Invalid escape sequence."; |
-const char* JSONReader::kSyntaxError = |
- "Syntax error."; |
-const char* JSONReader::kTrailingComma = |
- "Trailing comma not allowed."; |
-const char* JSONReader::kTooMuchNesting = |
- "Too much nesting."; |
-const char* JSONReader::kUnexpectedDataAfterRoot = |
- "Unexpected data after root element."; |
-const char* JSONReader::kUnsupportedEncoding = |
- "Unsupported encoding. JSON must be UTF-8."; |
-const char* JSONReader::kUnquotedDictionaryKey = |
- "Dictionary keys must be quoted."; |
- |
-/* static */ |
-Value* JSONReader::Read(const std::string& json, |
- bool allow_trailing_comma) { |
- return ReadAndReturnError(json, allow_trailing_comma, NULL); |
-} |
- |
-/* static */ |
-Value* JSONReader::ReadAndReturnError(const std::string& json, |
- bool allow_trailing_comma, |
- std::string *error_message_out) { |
- JSONReader reader = JSONReader(); |
- Value* root = reader.JsonToValue(json, true, allow_trailing_comma); |
- if (root) |
- return root; |
- |
- if (error_message_out) |
- *error_message_out = reader.error_message(); |
- |
- return NULL; |
-} |
- |
-/* static */ |
-std::string JSONReader::FormatErrorMessage(int line, int column, |
- const char* description) { |
- return StringPrintf("Line: %i, column: %i, %s", |
- line, column, description); |
-} |
- |
-JSONReader::JSONReader() |
- : start_pos_(NULL), json_pos_(NULL), stack_depth_(0), |
- allow_trailing_comma_(false) {} |
- |
-Value* JSONReader::JsonToValue(const std::string& json, bool check_root, |
- bool allow_trailing_comma) { |
- // The input must be in UTF-8. |
- if (!IsStringUTF8(json.c_str())) { |
- error_message_ = kUnsupportedEncoding; |
- return NULL; |
- } |
- |
- // The conversion from UTF8 to wstring removes null bytes for us |
- // (a good thing). |
- std::wstring json_wide(UTF8ToWide(json)); |
- start_pos_ = json_wide.c_str(); |
- |
- // When the input JSON string starts with a UTF-8 Byte-Order-Mark |
- // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode |
- // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from |
- // mis-treating a Unicode BOM as an invalid character and returning NULL, |
- // skip a converted Unicode BOM if it exists. |
- if (!json_wide.empty() && start_pos_[0] == 0xFEFF) { |
- ++start_pos_; |
- } |
- |
- json_pos_ = start_pos_; |
- allow_trailing_comma_ = allow_trailing_comma; |
- stack_depth_ = 0; |
- error_message_.clear(); |
- |
- scoped_ptr<Value> root(BuildValue(check_root)); |
- if (root.get()) { |
- if (ParseToken().type == Token::END_OF_INPUT) { |
- return root.release(); |
- } else { |
- SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_); |
- } |
- } |
- |
- // Default to calling errors "syntax errors". |
- if (error_message_.empty()) |
- SetErrorMessage(kSyntaxError, json_pos_); |
- |
- return NULL; |
-} |
- |
-Value* JSONReader::BuildValue(bool is_root) { |
- ++stack_depth_; |
- if (stack_depth_ > kStackLimit) { |
- SetErrorMessage(kTooMuchNesting, json_pos_); |
- return NULL; |
- } |
- |
- Token token = ParseToken(); |
- // The root token must be an array or an object. |
- if (is_root && token.type != Token::OBJECT_BEGIN && |
- token.type != Token::ARRAY_BEGIN) { |
- SetErrorMessage(kBadRootElementType, json_pos_); |
- return NULL; |
- } |
- |
- scoped_ptr<Value> node; |
- |
- switch (token.type) { |
- case Token::END_OF_INPUT: |
- case Token::INVALID_TOKEN: |
- return NULL; |
- |
- case Token::NULL_TOKEN: |
- node.reset(Value::CreateNullValue()); |
- break; |
- |
- case Token::BOOL_TRUE: |
- node.reset(Value::CreateBooleanValue(true)); |
- break; |
- |
- case Token::BOOL_FALSE: |
- node.reset(Value::CreateBooleanValue(false)); |
- break; |
- |
- case Token::NUMBER: |
- node.reset(DecodeNumber(token)); |
- if (!node.get()) |
- return NULL; |
- break; |
- |
- case Token::STRING: |
- node.reset(DecodeString(token)); |
- if (!node.get()) |
- return NULL; |
- break; |
- |
- case Token::ARRAY_BEGIN: |
- { |
- json_pos_ += token.length; |
- token = ParseToken(); |
- |
- node.reset(new ListValue()); |
- while (token.type != Token::ARRAY_END) { |
- Value* array_node = BuildValue(false); |
- if (!array_node) |
- return NULL; |
- static_cast<ListValue*>(node.get())->Append(array_node); |
- |
- // After a list value, we expect a comma or the end of the list. |
- token = ParseToken(); |
- if (token.type == Token::LIST_SEPARATOR) { |
- json_pos_ += token.length; |
- token = ParseToken(); |
- // Trailing commas are invalid according to the JSON RFC, but some |
- // consumers need the parsing leniency, so handle accordingly. |
- if (token.type == Token::ARRAY_END) { |
- if (!allow_trailing_comma_) { |
- SetErrorMessage(kTrailingComma, json_pos_); |
- return NULL; |
- } |
- // Trailing comma OK, stop parsing the Array. |
- break; |
- } |
- } else if (token.type != Token::ARRAY_END) { |
- // Unexpected value after list value. Bail out. |
- return NULL; |
- } |
- } |
- if (token.type != Token::ARRAY_END) { |
- return NULL; |
- } |
- break; |
- } |
- |
- case Token::OBJECT_BEGIN: |
- { |
- json_pos_ += token.length; |
- token = ParseToken(); |
- |
- node.reset(new DictionaryValue); |
- while (token.type != Token::OBJECT_END) { |
- if (token.type != Token::STRING) { |
- SetErrorMessage(kUnquotedDictionaryKey, json_pos_); |
- return NULL; |
- } |
- scoped_ptr<Value> dict_key_value(DecodeString(token)); |
- if (!dict_key_value.get()) |
- return NULL; |
- |
- // Convert the key into a wstring. |
- std::wstring dict_key; |
- bool success = dict_key_value->GetAsString(&dict_key); |
- DCHECK(success); |
- |
- json_pos_ += token.length; |
- token = ParseToken(); |
- if (token.type != Token::OBJECT_PAIR_SEPARATOR) |
- return NULL; |
- |
- json_pos_ += token.length; |
- token = ParseToken(); |
- Value* dict_value = BuildValue(false); |
- if (!dict_value) |
- return NULL; |
- static_cast<DictionaryValue*>(node.get())->Set(dict_key, dict_value); |
- |
- // After a key/value pair, we expect a comma or the end of the |
- // object. |
- token = ParseToken(); |
- if (token.type == Token::LIST_SEPARATOR) { |
- json_pos_ += token.length; |
- token = ParseToken(); |
- // Trailing commas are invalid according to the JSON RFC, but some |
- // consumers need the parsing leniency, so handle accordingly. |
- if (token.type == Token::OBJECT_END) { |
- if (!allow_trailing_comma_) { |
- SetErrorMessage(kTrailingComma, json_pos_); |
- return NULL; |
- } |
- // Trailing comma OK, stop parsing the Object. |
- break; |
- } |
- } else if (token.type != Token::OBJECT_END) { |
- // Unexpected value after last object value. Bail out. |
- return NULL; |
- } |
- } |
- if (token.type != Token::OBJECT_END) |
- return NULL; |
- |
- break; |
- } |
- |
- default: |
- // We got a token that's not a value. |
- return NULL; |
- } |
- json_pos_ += token.length; |
- |
- --stack_depth_; |
- return node.release(); |
-} |
- |
-JSONReader::Token JSONReader::ParseNumberToken() { |
- // We just grab the number here. We validate the size in DecodeNumber. |
- // According to RFC4627, a valid number is: [minus] int [frac] [exp] |
- Token token(Token::NUMBER, json_pos_, 0); |
- wchar_t c = *json_pos_; |
- if ('-' == c) { |
- ++token.length; |
- c = token.NextChar(); |
- } |
- |
- if (!ReadInt(token, false)) |
- return kInvalidToken; |
- |
- // Optional fraction part |
- c = token.NextChar(); |
- if ('.' == c) { |
- ++token.length; |
- if (!ReadInt(token, true)) |
- return kInvalidToken; |
- c = token.NextChar(); |
- } |
- |
- // Optional exponent part |
- if ('e' == c || 'E' == c) { |
- ++token.length; |
- c = token.NextChar(); |
- if ('-' == c || '+' == c) { |
- ++token.length; |
- c = token.NextChar(); |
- } |
- if (!ReadInt(token, true)) |
- return kInvalidToken; |
- } |
- |
- return token; |
-} |
- |
-Value* JSONReader::DecodeNumber(const Token& token) { |
- const std::wstring num_string(token.begin, token.length); |
- |
- int num_int; |
- if (StringToInt(WideToUTF16Hack(num_string), &num_int)) |
- return Value::CreateIntegerValue(num_int); |
- |
- double num_double; |
- if (StringToDouble(WideToUTF16Hack(num_string), &num_double) && |
- base::IsFinite(num_double)) |
- return Value::CreateRealValue(num_double); |
- |
- return NULL; |
-} |
- |
-JSONReader::Token JSONReader::ParseStringToken() { |
- Token token(Token::STRING, json_pos_, 1); |
- wchar_t c = token.NextChar(); |
- while ('\0' != c) { |
- if ('\\' == c) { |
- ++token.length; |
- c = token.NextChar(); |
- // Make sure the escaped char is valid. |
- switch (c) { |
- case 'x': |
- if (!ReadHexDigits(token, 2)) { |
- SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
- return kInvalidToken; |
- } |
- break; |
- case 'u': |
- if (!ReadHexDigits(token, 4)) { |
- SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
- return kInvalidToken; |
- } |
- break; |
- case '\\': |
- case '/': |
- case 'b': |
- case 'f': |
- case 'n': |
- case 'r': |
- case 't': |
- case 'v': |
- case '"': |
- break; |
- default: |
- SetErrorMessage(kInvalidEscape, json_pos_ + token.length); |
- return kInvalidToken; |
- } |
- } else if ('"' == c) { |
- ++token.length; |
- return token; |
- } |
- ++token.length; |
- c = token.NextChar(); |
- } |
- return kInvalidToken; |
-} |
- |
-Value* JSONReader::DecodeString(const Token& token) { |
- std::wstring decoded_str; |
- decoded_str.reserve(token.length - 2); |
- |
- for (int i = 1; i < token.length - 1; ++i) { |
- wchar_t c = *(token.begin + i); |
- if ('\\' == c) { |
- ++i; |
- c = *(token.begin + i); |
- switch (c) { |
- case '"': |
- case '/': |
- case '\\': |
- decoded_str.push_back(c); |
- break; |
- case 'b': |
- decoded_str.push_back('\b'); |
- break; |
- case 'f': |
- decoded_str.push_back('\f'); |
- break; |
- case 'n': |
- decoded_str.push_back('\n'); |
- break; |
- case 'r': |
- decoded_str.push_back('\r'); |
- break; |
- case 't': |
- decoded_str.push_back('\t'); |
- break; |
- case 'v': |
- decoded_str.push_back('\v'); |
- break; |
- |
- case 'x': |
- decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) + |
- HexToInt(*(token.begin + i + 2))); |
- i += 2; |
- break; |
- case 'u': |
- decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) + |
- (HexToInt(*(token.begin + i + 2)) << 8) + |
- (HexToInt(*(token.begin + i + 3)) << 4) + |
- HexToInt(*(token.begin + i + 4))); |
- i += 4; |
- break; |
- |
- default: |
- // We should only have valid strings at this point. If not, |
- // ParseStringToken didn't do it's job. |
- NOTREACHED(); |
- return NULL; |
- } |
- } else { |
- // Not escaped |
- decoded_str.push_back(c); |
- } |
- } |
- return Value::CreateStringValue(decoded_str); |
-} |
- |
-JSONReader::Token JSONReader::ParseToken() { |
- static const std::wstring kNullString(L"null"); |
- static const std::wstring kTrueString(L"true"); |
- static const std::wstring kFalseString(L"false"); |
- |
- EatWhitespaceAndComments(); |
- |
- Token token(Token::INVALID_TOKEN, 0, 0); |
- switch (*json_pos_) { |
- case '\0': |
- token.type = Token::END_OF_INPUT; |
- break; |
- |
- case 'n': |
- if (NextStringMatch(kNullString)) |
- token = Token(Token::NULL_TOKEN, json_pos_, 4); |
- break; |
- |
- case 't': |
- if (NextStringMatch(kTrueString)) |
- token = Token(Token::BOOL_TRUE, json_pos_, 4); |
- break; |
- |
- case 'f': |
- if (NextStringMatch(kFalseString)) |
- token = Token(Token::BOOL_FALSE, json_pos_, 5); |
- break; |
- |
- case '[': |
- token = Token(Token::ARRAY_BEGIN, json_pos_, 1); |
- break; |
- |
- case ']': |
- token = Token(Token::ARRAY_END, json_pos_, 1); |
- break; |
- |
- case ',': |
- token = Token(Token::LIST_SEPARATOR, json_pos_, 1); |
- break; |
- |
- case '{': |
- token = Token(Token::OBJECT_BEGIN, json_pos_, 1); |
- break; |
- |
- case '}': |
- token = Token(Token::OBJECT_END, json_pos_, 1); |
- break; |
- |
- case ':': |
- token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1); |
- break; |
- |
- case '0': |
- case '1': |
- case '2': |
- case '3': |
- case '4': |
- case '5': |
- case '6': |
- case '7': |
- case '8': |
- case '9': |
- case '-': |
- token = ParseNumberToken(); |
- break; |
- |
- case '"': |
- token = ParseStringToken(); |
- break; |
- } |
- return token; |
-} |
- |
-bool JSONReader::NextStringMatch(const std::wstring& str) { |
- for (size_t i = 0; i < str.length(); ++i) { |
- if ('\0' == *json_pos_) |
- return false; |
- if (*(json_pos_ + i) != str[i]) |
- return false; |
- } |
- return true; |
-} |
- |
-void JSONReader::EatWhitespaceAndComments() { |
- while ('\0' != *json_pos_) { |
- switch (*json_pos_) { |
- case ' ': |
- case '\n': |
- case '\r': |
- case '\t': |
- ++json_pos_; |
- break; |
- case '/': |
- // TODO(tc): This isn't in the RFC so it should be a parser flag. |
- if (!EatComment()) |
- return; |
- break; |
- default: |
- // Not a whitespace char, just exit. |
- return; |
- } |
- } |
-} |
- |
-bool JSONReader::EatComment() { |
- if ('/' != *json_pos_) |
- return false; |
- |
- wchar_t next_char = *(json_pos_ + 1); |
- if ('/' == next_char) { |
- // Line comment, read until \n or \r |
- json_pos_ += 2; |
- while ('\0' != *json_pos_) { |
- switch (*json_pos_) { |
- case '\n': |
- case '\r': |
- ++json_pos_; |
- return true; |
- default: |
- ++json_pos_; |
- } |
- } |
- } else if ('*' == next_char) { |
- // Block comment, read until */ |
- json_pos_ += 2; |
- while ('\0' != *json_pos_) { |
- if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) { |
- json_pos_ += 2; |
- return true; |
- } |
- ++json_pos_; |
- } |
- } else { |
- return false; |
- } |
- return true; |
-} |
- |
-void JSONReader::SetErrorMessage(const char* description, |
- const wchar_t* error_pos) { |
- int line_number = 1; |
- int column_number = 1; |
- |
- // Figure out the line and column the error occured at. |
- for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) { |
- if (*pos == '\0') { |
- NOTREACHED(); |
- return; |
- } |
- |
- if (*pos == '\n') { |
- ++line_number; |
- column_number = 1; |
- } else { |
- ++column_number; |
- } |
- } |
- |
- error_message_ = FormatErrorMessage(line_number, column_number, description); |
-} |