Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2342)

Unified Diff: base/json/string_escape.cc

Issue 100823007: Stop doing unnecessary UTF-8 to UTF-16 conversions in JSONWriter. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: '' Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/json/string_escape.cc
diff --git a/base/json/string_escape.cc b/base/json/string_escape.cc
index 10ea6707465c548a983696475caa4e5f50297e17..93b81fe32386cb2d2509439c1e6ef02f0b606f26 100644
--- a/base/json/string_escape.cc
+++ b/base/json/string_escape.cc
@@ -8,40 +8,55 @@
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
+#include "base/strings/utf_string_conversion_utils.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/third_party/icu/icu_utf.h"
namespace base {
namespace {
-// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful,
-// returns true and appends the escape sequence to |dst|. This isn't required
-// by the spec, but it's more readable by humans than the \uXXXX alternatives.
-template<typename CHAR>
-static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) {
+// Format string for printing a \uXXXX escape sequence.
+const char kU16EscapeFormat[] = "\\u%04X";
+
+// The code point to output for an invalid input code unit.
+const uint32 kReplacementCodePoint = 0xFFFD;
+
+// Try to escape the |code_point| if it is a known special character. If
+// successful, returns true and appends the escape sequence to |dest|. This
+// isn't required by the spec, but it's more readable by humans.
+bool EscapeCodePoint(uint32 code_point, std::string* dest) {
Mark Mentovai 2013/12/06 15:35:12 This name makes it sound like it can escape any co
jungshik at Google 2013/12/07 13:16:59 Agree. I had to come back up here to remind me wha
Robert Sesek 2013/12/09 19:52:09 Done.
// WARNING: if you add a new case here, you need to update the reader as well.
// Note: \v is in the reader, but not here since the JSON spec doesn't
// allow it.
- switch (c) {
+ switch (code_point) {
case '\b':
- dst->append("\\b");
+ dest->append("\\b");
break;
case '\f':
- dst->append("\\f");
+ dest->append("\\f");
break;
case '\n':
- dst->append("\\n");
+ dest->append("\\n");
break;
case '\r':
- dst->append("\\r");
+ dest->append("\\r");
break;
case '\t':
- dst->append("\\t");
+ dest->append("\\t");
break;
case '\\':
- dst->append("\\\\");
+ dest->append("\\\\");
break;
case '"':
- dst->append("\\\"");
+ dest->append("\\\"");
+ break;
+ // Escape <, > to prevent script execution.
Mark Mentovai 2013/12/06 15:35:12 You really only need < for this, you can save a co
Robert Sesek 2013/12/09 19:52:09 Done.
+ case '<':
+ dest->append("\\u003C");
+ break;
+ case '>':
+ dest->append("\\u003E");
break;
default:
return false;
@@ -49,57 +64,106 @@ static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) {
return true;
}
-template <class STR>
-void JsonDoubleQuoteT(const STR& str,
+template <typename S>
+int32 StringLength(const S& str) {
+ // Casting is necessary because ICU uses int32. Try and do so safely.
+ CHECK_LE(str.length(), static_cast<size_t>(kint32max));
Mark Mentovai 2013/12/06 15:35:12 std::numeric_limits<int32>::max() instead of kint3
Robert Sesek 2013/12/09 19:52:09 There seems to be a preference for the constant wh
+ return static_cast<int32>(str.length());
+}
+
+} // namespace
+
+void EscapeJSONString(const StringPiece& str,
bool put_in_quotes,
- std::string* dst) {
+ std::string* dest) {
if (put_in_quotes)
- dst->push_back('"');
-
- for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) {
- typename ToUnsigned<typename STR::value_type>::Unsigned c = *it;
- if (!JsonSingleEscapeChar(c, dst)) {
- if (c < 32 || c > 126 || c == '<' || c == '>') {
- // 1. Escaping <, > to prevent script execution.
- // 2. Technically, we could also pass through c > 126 as UTF8, but this
- // is also optional. It would also be a pain to implement here.
- unsigned int as_uint = static_cast<unsigned int>(c);
- base::StringAppendF(dst, "\\u%04X", as_uint);
+ dest->push_back('"');
+
+ const int32 length = StringLength(str);
+ for (int32 i = 0; i < length; ++i) {
+ uint32 code_point;
+ if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point))
+ code_point = kReplacementCodePoint;
+
+ if (EscapeCodePoint(code_point, dest))
+ continue;
+
+ // Escape non-printing characters.
+ if (code_point < 32)
+ base::StringAppendF(dest, kU16EscapeFormat, code_point);
+ else
+ WriteUnicodeCharacter(code_point, dest);
+ }
+
+ if (put_in_quotes)
+ dest->push_back('"');
+}
+
+void EscapeJSONString(const StringPiece16& str,
+ bool put_in_quotes,
+ std::string* dest) {
+ if (put_in_quotes)
+ dest->push_back('"');
+
+ const int32 length = StringLength(str);
+ for (int32 i = 0; i < length; ++i) {
+ uint32 code_point;
+ if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point))
+ code_point = kReplacementCodePoint;
jungshik at Google 2013/12/07 13:16:59 ReadUnicodeCharacter passes througn non-character
Robert Sesek 2013/12/09 19:52:09 base::StringValue performs a DCHECK(IsStrintUTF8()
+
+ if (EscapeCodePoint(code_point, dest))
+ continue;
+
+ if (code_point < 32 || code_point > 126) {
+ if (CBU16_LENGTH(code_point) == 1) {
+ base::StringAppendF(dest, kU16EscapeFormat, code_point);
} else {
- unsigned char ascii = static_cast<unsigned char>(*it);
- dst->push_back(ascii);
+ base::StringAppendF(dest, kU16EscapeFormat, CBU16_LEAD(code_point));
+ base::StringAppendF(dest, kU16EscapeFormat, CBU16_TRAIL(code_point));
}
+ } else {
+ dest->push_back(static_cast<char>(code_point));
}
}
if (put_in_quotes)
- dst->push_back('"');
+ dest->push_back('"');
}
-} // namespace
-
-void JsonDoubleQuote(const StringPiece& str,
- bool put_in_quotes,
- std::string* dst) {
- JsonDoubleQuoteT(str, put_in_quotes, dst);
+std::string GetQuotedJSONString(const StringPiece& str) {
+ std::string dest;
+ EscapeJSONString(str, true, &dest);
+ return dest;
}
-std::string GetDoubleQuotedJson(const StringPiece& str) {
- std::string dst;
- JsonDoubleQuote(str, true, &dst);
- return dst;
+std::string GetQuotedJSONString(const StringPiece16& str) {
+ std::string dest;
+ EscapeJSONString(str, true, &dest);
+ return dest;
}
-void JsonDoubleQuote(const StringPiece16& str,
- bool put_in_quotes,
- std::string* dst) {
- JsonDoubleQuoteT(str, put_in_quotes, dst);
-}
+std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
+ bool put_in_quotes) {
+ std::string dest;
+
+ if (put_in_quotes)
+ dest.push_back('"');
+
+ for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
+ ToUnsigned<StringPiece::value_type>::Unsigned c = *it;
+ if (EscapeCodePoint(c, &dest))
+ continue;
+
+ if (c < 32 || c > 126)
+ base::StringAppendF(&dest, kU16EscapeFormat, c);
+ else
+ dest.push_back(*it);
+ }
+
+ if (put_in_quotes)
+ dest.push_back('"');
-std::string GetDoubleQuotedJson(const StringPiece16& str) {
- std::string dst;
- JsonDoubleQuote(str, true, &dst);
- return dst;
+ return dest;
}
} // namespace base

Powered by Google App Engine
This is Rietveld 408576698