Index: base/json/string_escape.cc |
diff --git a/base/json/string_escape.cc b/base/json/string_escape.cc |
index 10ea6707465c548a983696475caa4e5f50297e17..93b81fe32386cb2d2509439c1e6ef02f0b606f26 100644 |
--- a/base/json/string_escape.cc |
+++ b/base/json/string_escape.cc |
@@ -8,40 +8,55 @@ |
#include "base/strings/string_util.h" |
#include "base/strings/stringprintf.h" |
+#include "base/strings/utf_string_conversion_utils.h" |
+#include "base/strings/utf_string_conversions.h" |
+#include "base/third_party/icu/icu_utf.h" |
namespace base { |
namespace { |
-// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, |
-// returns true and appends the escape sequence to |dst|. This isn't required |
-// by the spec, but it's more readable by humans than the \uXXXX alternatives. |
-template<typename CHAR> |
-static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { |
+// Format string for printing a \uXXXX escape sequence. |
+const char kU16EscapeFormat[] = "\\u%04X"; |
+ |
+// The code point to output for an invalid input code unit. |
+const uint32 kReplacementCodePoint = 0xFFFD; |
+ |
+// Try to escape the |code_point| if it is a known special character. If |
+// successful, returns true and appends the escape sequence to |dest|. This |
+// isn't required by the spec, but it's more readable by humans. |
+bool EscapeCodePoint(uint32 code_point, std::string* dest) { |
Mark Mentovai
2013/12/06 15:35:12
This name makes it sound like it can escape any co
jungshik at Google
2013/12/07 13:16:59
Agree. I had to come back up here to remind me wha
Robert Sesek
2013/12/09 19:52:09
Done.
|
// WARNING: if you add a new case here, you need to update the reader as well. |
// Note: \v is in the reader, but not here since the JSON spec doesn't |
// allow it. |
- switch (c) { |
+ switch (code_point) { |
case '\b': |
- dst->append("\\b"); |
+ dest->append("\\b"); |
break; |
case '\f': |
- dst->append("\\f"); |
+ dest->append("\\f"); |
break; |
case '\n': |
- dst->append("\\n"); |
+ dest->append("\\n"); |
break; |
case '\r': |
- dst->append("\\r"); |
+ dest->append("\\r"); |
break; |
case '\t': |
- dst->append("\\t"); |
+ dest->append("\\t"); |
break; |
case '\\': |
- dst->append("\\\\"); |
+ dest->append("\\\\"); |
break; |
case '"': |
- dst->append("\\\""); |
+ dest->append("\\\""); |
+ break; |
+ // Escape <, > to prevent script execution. |
Mark Mentovai
2013/12/06 15:35:12
You really only need < for this, you can save a co
Robert Sesek
2013/12/09 19:52:09
Done.
|
+ case '<': |
+ dest->append("\\u003C"); |
+ break; |
+ case '>': |
+ dest->append("\\u003E"); |
break; |
default: |
return false; |
@@ -49,57 +64,106 @@ static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { |
return true; |
} |
-template <class STR> |
-void JsonDoubleQuoteT(const STR& str, |
+template <typename S> |
+int32 StringLength(const S& str) { |
+ // Casting is necessary because ICU uses int32. Try and do so safely. |
+ CHECK_LE(str.length(), static_cast<size_t>(kint32max)); |
Mark Mentovai
2013/12/06 15:35:12
std::numeric_limits<int32>::max() instead of kint3
Robert Sesek
2013/12/09 19:52:09
There seems to be a preference for the constant wh
|
+ return static_cast<int32>(str.length()); |
+} |
+ |
+} // namespace |
+ |
+void EscapeJSONString(const StringPiece& str, |
bool put_in_quotes, |
- std::string* dst) { |
+ std::string* dest) { |
if (put_in_quotes) |
- dst->push_back('"'); |
- |
- for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { |
- typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; |
- if (!JsonSingleEscapeChar(c, dst)) { |
- if (c < 32 || c > 126 || c == '<' || c == '>') { |
- // 1. Escaping <, > to prevent script execution. |
- // 2. Technically, we could also pass through c > 126 as UTF8, but this |
- // is also optional. It would also be a pain to implement here. |
- unsigned int as_uint = static_cast<unsigned int>(c); |
- base::StringAppendF(dst, "\\u%04X", as_uint); |
+ dest->push_back('"'); |
+ |
+ const int32 length = StringLength(str); |
+ for (int32 i = 0; i < length; ++i) { |
+ uint32 code_point; |
+ if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) |
+ code_point = kReplacementCodePoint; |
+ |
+ if (EscapeCodePoint(code_point, dest)) |
+ continue; |
+ |
+ // Escape non-printing characters. |
+ if (code_point < 32) |
+ base::StringAppendF(dest, kU16EscapeFormat, code_point); |
+ else |
+ WriteUnicodeCharacter(code_point, dest); |
+ } |
+ |
+ if (put_in_quotes) |
+ dest->push_back('"'); |
+} |
+ |
+void EscapeJSONString(const StringPiece16& str, |
+ bool put_in_quotes, |
+ std::string* dest) { |
+ if (put_in_quotes) |
+ dest->push_back('"'); |
+ |
+ const int32 length = StringLength(str); |
+ for (int32 i = 0; i < length; ++i) { |
+ uint32 code_point; |
+ if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) |
+ code_point = kReplacementCodePoint; |
jungshik at Google
2013/12/07 13:16:59
ReadUnicodeCharacter passes througn non-character
Robert Sesek
2013/12/09 19:52:09
base::StringValue performs a DCHECK(IsStrintUTF8()
|
+ |
+ if (EscapeCodePoint(code_point, dest)) |
+ continue; |
+ |
+ if (code_point < 32 || code_point > 126) { |
+ if (CBU16_LENGTH(code_point) == 1) { |
+ base::StringAppendF(dest, kU16EscapeFormat, code_point); |
} else { |
- unsigned char ascii = static_cast<unsigned char>(*it); |
- dst->push_back(ascii); |
+ base::StringAppendF(dest, kU16EscapeFormat, CBU16_LEAD(code_point)); |
+ base::StringAppendF(dest, kU16EscapeFormat, CBU16_TRAIL(code_point)); |
} |
+ } else { |
+ dest->push_back(static_cast<char>(code_point)); |
} |
} |
if (put_in_quotes) |
- dst->push_back('"'); |
+ dest->push_back('"'); |
} |
-} // namespace |
- |
-void JsonDoubleQuote(const StringPiece& str, |
- bool put_in_quotes, |
- std::string* dst) { |
- JsonDoubleQuoteT(str, put_in_quotes, dst); |
+std::string GetQuotedJSONString(const StringPiece& str) { |
+ std::string dest; |
+ EscapeJSONString(str, true, &dest); |
+ return dest; |
} |
-std::string GetDoubleQuotedJson(const StringPiece& str) { |
- std::string dst; |
- JsonDoubleQuote(str, true, &dst); |
- return dst; |
+std::string GetQuotedJSONString(const StringPiece16& str) { |
+ std::string dest; |
+ EscapeJSONString(str, true, &dest); |
+ return dest; |
} |
-void JsonDoubleQuote(const StringPiece16& str, |
- bool put_in_quotes, |
- std::string* dst) { |
- JsonDoubleQuoteT(str, put_in_quotes, dst); |
-} |
+std::string EscapeBytesAsInvalidJSONString(const StringPiece& str, |
+ bool put_in_quotes) { |
+ std::string dest; |
+ |
+ if (put_in_quotes) |
+ dest.push_back('"'); |
+ |
+ for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) { |
+ ToUnsigned<StringPiece::value_type>::Unsigned c = *it; |
+ if (EscapeCodePoint(c, &dest)) |
+ continue; |
+ |
+ if (c < 32 || c > 126) |
+ base::StringAppendF(&dest, kU16EscapeFormat, c); |
+ else |
+ dest.push_back(*it); |
+ } |
+ |
+ if (put_in_quotes) |
+ dest.push_back('"'); |
-std::string GetDoubleQuotedJson(const StringPiece16& str) { |
- std::string dst; |
- JsonDoubleQuote(str, true, &dst); |
- return dst; |
+ return dest; |
} |
} // namespace base |