Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3308)

Unified Diff: base/json/json_reader.cc

Issue 9801007: Improve JSONReader performance by up to 55% by using std::string instead of wstring. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Safety for \x Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/json/json_reader.h ('k') | base/json/json_reader_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/json/json_reader.cc
diff --git a/base/json/json_reader.cc b/base/json/json_reader.cc
index bbaf5fb349bcf396d9fa26f6ba0269faac0d9315..67b35e7b99d9b3f32c343dd2673179f8e9a515cf 100644
--- a/base/json/json_reader.cc
+++ b/base/json/json_reader.cc
@@ -9,15 +9,17 @@
#include "base/memory/scoped_ptr.h"
#include "base/stringprintf.h"
#include "base/string_number_conversions.h"
+#include "base/string_piece.h"
#include "base/string_util.h"
+#include "base/third_party/icu/icu_utf.h"
#include "base/utf_string_conversions.h"
#include "base/values.h"
namespace {
-const wchar_t kNullString[] = L"null";
-const wchar_t kTrueString[] = L"true";
-const wchar_t kFalseString[] = L"false";
+const char kNullString[] = "null";
+const char kTrueString[] = "true";
+const char kFalseString[] = "false";
const int kStackLimit = 100;
@@ -25,11 +27,11 @@ const int kStackLimit = 100;
// token. The method returns false if there is no valid integer at the end of
// the token.
bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) {
- wchar_t first = token.NextChar();
+ char first = token.NextChar();
int len = 0;
// Read in more digits.
- wchar_t c = first;
+ char c = first;
while ('\0' != c && IsAsciiDigit(c)) {
++token.length;
++len;
@@ -50,7 +52,7 @@ bool ReadInt(base::JSONReader::Token& token, bool can_have_leading_zeros) {
// the method returns false.
bool ReadHexDigits(base::JSONReader::Token& token, int digits) {
for (int i = 1; i <= digits; ++i) {
- wchar_t c = *(token.begin + token.length + i);
+ char c = *(token.begin + token.length + i);
if (c == '\0' || !IsHexDigit(c))
return false;
}
@@ -83,6 +85,7 @@ const char* JSONReader::kUnquotedDictionaryKey =
JSONReader::JSONReader()
: start_pos_(NULL),
json_pos_(NULL),
+ end_pos_(NULL),
stack_depth_(0),
allow_trailing_comma_(false),
error_code_(JSON_NO_ERROR),
@@ -148,23 +151,21 @@ std::string JSONReader::GetErrorMessage() const {
Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
bool allow_trailing_comma) {
// The input must be in UTF-8.
- if (!IsStringUTF8(json.c_str())) {
+ if (!IsStringUTF8(json.data())) {
error_code_ = JSON_UNSUPPORTED_ENCODING;
return NULL;
}
- // The conversion from UTF8 to wstring removes null bytes for us
- // (a good thing).
- std::wstring json_wide(UTF8ToWide(json));
- start_pos_ = json_wide.c_str();
-
- // When the input JSON string starts with a UTF-8 Byte-Order-Mark
- // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode
- // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from
- // mis-treating a Unicode BOM as an invalid character and returning NULL,
- // skip a converted Unicode BOM if it exists.
- if (!json_wide.empty() && start_pos_[0] == 0xFEFF) {
- ++start_pos_;
+ start_pos_ = json.data();
+ end_pos_ = start_pos_ + json.size();
+
+ // When the input JSON string starts with a UTF-8 Byte-Order-Mark (U+FEFF)
+ // or <0xEF 0xBB 0xBF>, advance the start position to avoid the
+ // JSONReader::BuildValue() function from mis-treating a Unicode BOM as an
+ // invalid character and returning NULL.
+ if (json.size() >= 3 && start_pos_[0] == 0xEF &&
+ start_pos_[1] == 0xBB && start_pos_[2] == 0xBF) {
+ start_pos_ += 3;
}
json_pos_ = start_pos_;
@@ -356,7 +357,7 @@ JSONReader::Token JSONReader::ParseNumberToken() {
// We just grab the number here. We validate the size in DecodeNumber.
// According to RFC4627, a valid number is: [minus] int [frac] [exp]
Token token(Token::NUMBER, json_pos_, 0);
- wchar_t c = *json_pos_;
+ char c = *json_pos_;
if ('-' == c) {
++token.length;
c = token.NextChar();
@@ -390,15 +391,14 @@ JSONReader::Token JSONReader::ParseNumberToken() {
}
Value* JSONReader::DecodeNumber(const Token& token) {
- const std::wstring num_string(token.begin, token.length);
+ const std::string num_string(token.begin, token.length);
int num_int;
- if (StringToInt(WideToUTF8(num_string), &num_int))
+ if (StringToInt(num_string, &num_int))
return Value::CreateIntegerValue(num_int);
double num_double;
- if (StringToDouble(WideToUTF8(num_string), &num_double) &&
- base::IsFinite(num_double))
+ if (StringToDouble(num_string, &num_double) && base::IsFinite(num_double))
return Value::CreateDoubleValue(num_double);
return NULL;
@@ -406,8 +406,8 @@ Value* JSONReader::DecodeNumber(const Token& token) {
JSONReader::Token JSONReader::ParseStringToken() {
Token token(Token::STRING, json_pos_, 1);
- wchar_t c = token.NextChar();
- while ('\0' != c) {
+ char c = token.NextChar();
+ while (json_pos_ + token.length < end_pos_) {
if ('\\' == c) {
++token.length;
c = token.NextChar();
@@ -450,11 +450,11 @@ JSONReader::Token JSONReader::ParseStringToken() {
}
Value* JSONReader::DecodeString(const Token& token) {
- std::wstring decoded_str;
+ std::string decoded_str;
decoded_str.reserve(token.length - 2);
for (int i = 1; i < token.length - 1; ++i) {
- wchar_t c = *(token.begin + i);
+ char c = *(token.begin + i);
if ('\\' == c) {
++i;
c = *(token.begin + i);
@@ -483,17 +483,19 @@ Value* JSONReader::DecodeString(const Token& token) {
decoded_str.push_back('\v');
break;
- case 'x':
- decoded_str.push_back((HexDigitToInt(*(token.begin + i + 1)) << 4) +
- HexDigitToInt(*(token.begin + i + 2)));
+ case 'x': {
+ if (i + 2 >= token.length)
+ return NULL;
+ int hex_digit = 0;
+ if (!HexStringToInt(StringPiece(token.begin + i + 1, 2), &hex_digit))
+ return NULL;
+ decoded_str.push_back(hex_digit);
i += 2;
break;
+ }
case 'u':
- decoded_str.push_back((HexDigitToInt(*(token.begin + i + 1)) << 12 ) +
- (HexDigitToInt(*(token.begin + i + 2)) << 8) +
- (HexDigitToInt(*(token.begin + i + 3)) << 4) +
- HexDigitToInt(*(token.begin + i + 4)));
- i += 4;
+ if (!ConvertUTF16Units(token, &i, &decoded_str))
+ return NULL;
break;
default:
@@ -507,7 +509,66 @@ Value* JSONReader::DecodeString(const Token& token) {
decoded_str.push_back(c);
}
}
- return Value::CreateStringValue(WideToUTF16Hack(decoded_str));
+ return Value::CreateStringValue(decoded_str);
+}
+
+bool JSONReader::ConvertUTF16Units(const Token& token,
+ int* i,
+ std::string* dest_string) {
+ if (*i + 4 >= token.length)
+ return false;
+
+ // This is a 32-bit field because the shift operations in the
+ // conversion process below cause MSVC to error about "data loss."
+ // This only stores UTF-16 code units, though.
+ // Consume the UTF-16 code unit, which may be a high surrogate.
+ int code_unit16_high = 0;
+ if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_high))
+ return false;
+ *i += 4;
+
+ // If this is a high surrogate, consume the next code unit to get the
+ // low surrogate.
+ int code_unit16_low = 0;
+ if (CBU16_IS_SURROGATE(code_unit16_high)) {
+ // Make sure this is the high surrogate. If not, it's an encoding
+ // error.
+ if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
+ return false;
+
+ // Make sure that the token has more characters to consume the
+ // lower surrogate.
+ if (*i + 6 >= token.length)
+ return false;
+ if (*(++(*i) + token.begin) != '\\' || *(++(*i) + token.begin) != 'u')
+ return false;
+
+ if (!HexStringToInt(StringPiece(token.begin + *i + 1, 4), &code_unit16_low))
+ return false;
+ *i += 4;
+ if (!CBU16_IS_SURROGATE(code_unit16_low) ||
+ !CBU16_IS_TRAIL(code_unit16_low)) {
+ return false;
+ }
+ } else if (!CBU16_IS_SINGLE(code_unit16_high)) {
+ // If this is not a code point, it's an encoding error.
+ return false;
+ }
+
+ // Convert the UTF-16 code units to a code point and then to a UTF-8
+ // code unit sequence.
+ char code_point[8] = { 0 };
+ size_t offset = 0;
+ if (!code_unit16_low) {
+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit16_high);
+ } else {
+ uint32 code_unit32 = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
+ code_unit16_low);
+ offset = 0;
+ CBU8_APPEND_UNSAFE(code_point, offset, code_unit32);
+ }
+ dest_string->append(code_point);
+ return true;
}
JSONReader::Token JSONReader::ParseToken() {
@@ -580,7 +641,7 @@ JSONReader::Token JSONReader::ParseToken() {
}
void JSONReader::EatWhitespaceAndComments() {
- while ('\0' != *json_pos_) {
+ while (json_pos_ != end_pos_) {
switch (*json_pos_) {
case ' ':
case '\n':
@@ -604,11 +665,11 @@ bool JSONReader::EatComment() {
if ('/' != *json_pos_)
return false;
- wchar_t next_char = *(json_pos_ + 1);
+ char next_char = *(json_pos_ + 1);
if ('/' == next_char) {
// Line comment, read until \n or \r
json_pos_ += 2;
- while ('\0' != *json_pos_) {
+ while (json_pos_ != end_pos_) {
switch (*json_pos_) {
case '\n':
case '\r':
@@ -621,7 +682,7 @@ bool JSONReader::EatComment() {
} else if ('*' == next_char) {
// Block comment, read until */
json_pos_ += 2;
- while ('\0' != *json_pos_) {
+ while (json_pos_ != end_pos_) {
if ('*' == *json_pos_ && '/' == *(json_pos_ + 1)) {
json_pos_ += 2;
return true;
@@ -634,18 +695,18 @@ bool JSONReader::EatComment() {
return true;
}
-bool JSONReader::NextStringMatch(const wchar_t* str, size_t length) {
- return wcsncmp(json_pos_, str, length) == 0;
+bool JSONReader::NextStringMatch(const char* str, size_t length) {
+ return strncmp(json_pos_, str, length) == 0;
}
void JSONReader::SetErrorCode(JsonParseError error,
- const wchar_t* error_pos) {
+ const char* error_pos) {
int line_number = 1;
int column_number = 1;
// Figure out the line and column the error occured at.
- for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) {
- if (*pos == '\0') {
+ for (const char* pos = start_pos_; pos != error_pos; ++pos) {
+ if (pos > end_pos_) {
NOTREACHED();
return;
}
« no previous file with comments | « base/json/json_reader.h ('k') | base/json/json_reader_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698