Chromium Code Reviews| Index: base/json/string_escape.cc |
| diff --git a/base/json/string_escape.cc b/base/json/string_escape.cc |
| index 10ea6707465c548a983696475caa4e5f50297e17..93b81fe32386cb2d2509439c1e6ef02f0b606f26 100644 |
| --- a/base/json/string_escape.cc |
| +++ b/base/json/string_escape.cc |
| @@ -8,40 +8,55 @@ |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| +#include "base/strings/utf_string_conversion_utils.h" |
| +#include "base/strings/utf_string_conversions.h" |
| +#include "base/third_party/icu/icu_utf.h" |
| namespace base { |
| namespace { |
| -// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, |
| -// returns true and appends the escape sequence to |dst|. This isn't required |
| -// by the spec, but it's more readable by humans than the \uXXXX alternatives. |
| -template<typename CHAR> |
| -static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { |
| +// Format string for printing a \uXXXX escape sequence. |
| +const char kU16EscapeFormat[] = "\\u%04X"; |
| + |
| +// The code point to output for an invalid input code unit. |
| +const uint32 kReplacementCodePoint = 0xFFFD; |
| + |
| +// Try to escape the |code_point| if it is a known special character. If |
| +// successful, returns true and appends the escape sequence to |dest|. This |
| +// isn't required by the spec, but it's more readable by humans. |
| +bool EscapeCodePoint(uint32 code_point, std::string* dest) { |
|
Mark Mentovai
2013/12/06 15:35:12
This name makes it sound like it can escape any co
jungshik at Google
2013/12/07 13:16:59
Agree. I had to come back up here to remind me wha
Robert Sesek
2013/12/09 19:52:09
Done.
|
| // WARNING: if you add a new case here, you need to update the reader as well. |
| // Note: \v is in the reader, but not here since the JSON spec doesn't |
| // allow it. |
| - switch (c) { |
| + switch (code_point) { |
| case '\b': |
| - dst->append("\\b"); |
| + dest->append("\\b"); |
| break; |
| case '\f': |
| - dst->append("\\f"); |
| + dest->append("\\f"); |
| break; |
| case '\n': |
| - dst->append("\\n"); |
| + dest->append("\\n"); |
| break; |
| case '\r': |
| - dst->append("\\r"); |
| + dest->append("\\r"); |
| break; |
| case '\t': |
| - dst->append("\\t"); |
| + dest->append("\\t"); |
| break; |
| case '\\': |
| - dst->append("\\\\"); |
| + dest->append("\\\\"); |
| break; |
| case '"': |
| - dst->append("\\\""); |
| + dest->append("\\\""); |
| + break; |
| + // Escape <, > to prevent script execution. |
|
Mark Mentovai
2013/12/06 15:35:12
You really only need < for this, you can save a co
Robert Sesek
2013/12/09 19:52:09
Done.
|
| + case '<': |
| + dest->append("\\u003C"); |
| + break; |
| + case '>': |
| + dest->append("\\u003E"); |
| break; |
| default: |
| return false; |
| @@ -49,57 +64,106 @@ static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { |
| return true; |
| } |
| -template <class STR> |
| -void JsonDoubleQuoteT(const STR& str, |
| +template <typename S> |
| +int32 StringLength(const S& str) { |
| + // Casting is necessary because ICU uses int32. Try and do so safely. |
| + CHECK_LE(str.length(), static_cast<size_t>(kint32max)); |
|
Mark Mentovai
2013/12/06 15:35:12
std::numeric_limits<int32>::max() instead of kint3
Robert Sesek
2013/12/09 19:52:09
There seems to be a preference for the constant wh
|
| + return static_cast<int32>(str.length()); |
| +} |
| + |
| +} // namespace |
| + |
| +void EscapeJSONString(const StringPiece& str, |
| bool put_in_quotes, |
| - std::string* dst) { |
| + std::string* dest) { |
| if (put_in_quotes) |
| - dst->push_back('"'); |
| - |
| - for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { |
| - typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; |
| - if (!JsonSingleEscapeChar(c, dst)) { |
| - if (c < 32 || c > 126 || c == '<' || c == '>') { |
| - // 1. Escaping <, > to prevent script execution. |
| - // 2. Technically, we could also pass through c > 126 as UTF8, but this |
| - // is also optional. It would also be a pain to implement here. |
| - unsigned int as_uint = static_cast<unsigned int>(c); |
| - base::StringAppendF(dst, "\\u%04X", as_uint); |
| + dest->push_back('"'); |
| + |
| + const int32 length = StringLength(str); |
| + for (int32 i = 0; i < length; ++i) { |
| + uint32 code_point; |
| + if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) |
| + code_point = kReplacementCodePoint; |
| + |
| + if (EscapeCodePoint(code_point, dest)) |
| + continue; |
| + |
| + // Escape non-printing characters. |
| + if (code_point < 32) |
| + base::StringAppendF(dest, kU16EscapeFormat, code_point); |
| + else |
| + WriteUnicodeCharacter(code_point, dest); |
| + } |
| + |
| + if (put_in_quotes) |
| + dest->push_back('"'); |
| +} |
| + |
| +void EscapeJSONString(const StringPiece16& str, |
| + bool put_in_quotes, |
| + std::string* dest) { |
| + if (put_in_quotes) |
| + dest->push_back('"'); |
| + |
| + const int32 length = StringLength(str); |
| + for (int32 i = 0; i < length; ++i) { |
| + uint32 code_point; |
| + if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) |
| + code_point = kReplacementCodePoint; |
|
jungshik at Google
2013/12/07 13:16:59
ReadUnicodeCharacter passes througn non-character
Robert Sesek
2013/12/09 19:52:09
base::StringValue performs a DCHECK(IsStrintUTF8()
|
| + |
| + if (EscapeCodePoint(code_point, dest)) |
| + continue; |
| + |
| + if (code_point < 32 || code_point > 126) { |
| + if (CBU16_LENGTH(code_point) == 1) { |
| + base::StringAppendF(dest, kU16EscapeFormat, code_point); |
| } else { |
| - unsigned char ascii = static_cast<unsigned char>(*it); |
| - dst->push_back(ascii); |
| + base::StringAppendF(dest, kU16EscapeFormat, CBU16_LEAD(code_point)); |
| + base::StringAppendF(dest, kU16EscapeFormat, CBU16_TRAIL(code_point)); |
| } |
| + } else { |
| + dest->push_back(static_cast<char>(code_point)); |
| } |
| } |
| if (put_in_quotes) |
| - dst->push_back('"'); |
| + dest->push_back('"'); |
| } |
| -} // namespace |
| - |
| -void JsonDoubleQuote(const StringPiece& str, |
| - bool put_in_quotes, |
| - std::string* dst) { |
| - JsonDoubleQuoteT(str, put_in_quotes, dst); |
| +std::string GetQuotedJSONString(const StringPiece& str) { |
| + std::string dest; |
| + EscapeJSONString(str, true, &dest); |
| + return dest; |
| } |
| -std::string GetDoubleQuotedJson(const StringPiece& str) { |
| - std::string dst; |
| - JsonDoubleQuote(str, true, &dst); |
| - return dst; |
| +std::string GetQuotedJSONString(const StringPiece16& str) { |
| + std::string dest; |
| + EscapeJSONString(str, true, &dest); |
| + return dest; |
| } |
| -void JsonDoubleQuote(const StringPiece16& str, |
| - bool put_in_quotes, |
| - std::string* dst) { |
| - JsonDoubleQuoteT(str, put_in_quotes, dst); |
| -} |
| +std::string EscapeBytesAsInvalidJSONString(const StringPiece& str, |
| + bool put_in_quotes) { |
| + std::string dest; |
| + |
| + if (put_in_quotes) |
| + dest.push_back('"'); |
| + |
| + for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) { |
| + ToUnsigned<StringPiece::value_type>::Unsigned c = *it; |
| + if (EscapeCodePoint(c, &dest)) |
| + continue; |
| + |
| + if (c < 32 || c > 126) |
| + base::StringAppendF(&dest, kU16EscapeFormat, c); |
| + else |
| + dest.push_back(*it); |
| + } |
| + |
| + if (put_in_quotes) |
| + dest.push_back('"'); |
| -std::string GetDoubleQuotedJson(const StringPiece16& str) { |
| - std::string dst; |
| - JsonDoubleQuote(str, true, &dst); |
| - return dst; |
| + return dest; |
| } |
| } // namespace base |