Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2678)

Unified Diff: base/json/string_escape.cc

Issue 100823007: Stop doing unnecessary UTF-8 to UTF-16 conversions in JSONWriter. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix ChromeOS page encodings Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/json/string_escape.h ('k') | base/json/string_escape_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/json/string_escape.cc
diff --git a/base/json/string_escape.cc b/base/json/string_escape.cc
index 10ea6707465c548a983696475caa4e5f50297e17..a3b0735191ee0dfc1b40d61862f3015570855297 100644
--- a/base/json/string_escape.cc
+++ b/base/json/string_escape.cc
@@ -8,40 +8,56 @@
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
+#include "base/strings/utf_string_conversion_utils.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/third_party/icu/icu_utf.h"
namespace base {
namespace {
-// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful,
-// returns true and appends the escape sequence to |dst|. This isn't required
-// by the spec, but it's more readable by humans than the \uXXXX alternatives.
-template<typename CHAR>
-static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) {
+// Format string for printing a \uXXXX escape sequence.
+const char kU16EscapeFormat[] = "\\u%04X";
+
+// The code point to output for an invalid input code unit.
+const uint32 kReplacementCodePoint = 0xFFFD;
+
+// Used below in EscapeSpecialCodePoint().
+COMPILE_ASSERT('<' == 0x3C, less_than_sign_is_0x3c);
+
+// Try to escape the |code_point| if it is a known special character. If
+// successful, returns true and appends the escape sequence to |dest|. This
+// isn't required by the spec, but it's more readable by humans.
+bool EscapeSpecialCodePoint(uint32 code_point, std::string* dest) {
// WARNING: if you add a new case here, you need to update the reader as well.
// Note: \v is in the reader, but not here since the JSON spec doesn't
// allow it.
- switch (c) {
+ switch (code_point) {
case '\b':
- dst->append("\\b");
+ dest->append("\\b");
break;
case '\f':
- dst->append("\\f");
+ dest->append("\\f");
break;
case '\n':
- dst->append("\\n");
+ dest->append("\\n");
break;
case '\r':
- dst->append("\\r");
+ dest->append("\\r");
break;
case '\t':
- dst->append("\\t");
+ dest->append("\\t");
break;
case '\\':
- dst->append("\\\\");
+ dest->append("\\\\");
break;
case '"':
- dst->append("\\\"");
+ dest->append("\\\"");
+ break;
+ // Escape < to prevent script execution; escaping > is not necessary and
+ // not doing so save a few bytes.
+ case '<':
+ dest->append("\\u003C");
break;
default:
return false;
@@ -49,57 +65,90 @@ static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) {
return true;
}
-template <class STR>
-void JsonDoubleQuoteT(const STR& str,
- bool put_in_quotes,
- std::string* dst) {
+template <typename S>
+bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
+ bool did_replacement = false;
+
if (put_in_quotes)
- dst->push_back('"');
-
- for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) {
- typename ToUnsigned<typename STR::value_type>::Unsigned c = *it;
- if (!JsonSingleEscapeChar(c, dst)) {
- if (c < 32 || c > 126 || c == '<' || c == '>') {
- // 1. Escaping <, > to prevent script execution.
- // 2. Technically, we could also pass through c > 126 as UTF8, but this
- // is also optional. It would also be a pain to implement here.
- unsigned int as_uint = static_cast<unsigned int>(c);
- base::StringAppendF(dst, "\\u%04X", as_uint);
- } else {
- unsigned char ascii = static_cast<unsigned char>(*it);
- dst->push_back(ascii);
- }
+ dest->push_back('"');
+
+ // Casting is necessary because ICU uses int32. Try and do so safely.
+ CHECK_LE(str.length(), static_cast<size_t>(kint32max));
+ const int32 length = static_cast<int32>(str.length());
+
+ for (int32 i = 0; i < length; ++i) {
+ uint32 code_point;
+ if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) {
+ code_point = kReplacementCodePoint;
+ did_replacement = true;
}
+
+ if (EscapeSpecialCodePoint(code_point, dest))
+ continue;
+
+ // Escape non-printing characters.
+ if (code_point < 32)
+ base::StringAppendF(dest, kU16EscapeFormat, code_point);
+ else
+ WriteUnicodeCharacter(code_point, dest);
}
if (put_in_quotes)
- dst->push_back('"');
+ dest->push_back('"');
+
+ return !did_replacement;
}
} // namespace
-void JsonDoubleQuote(const StringPiece& str,
- bool put_in_quotes,
- std::string* dst) {
- JsonDoubleQuoteT(str, put_in_quotes, dst);
+bool EscapeJSONString(const StringPiece& str,
+ bool put_in_quotes,
+ std::string* dest) {
+ return EscapeJSONStringImpl(str, put_in_quotes, dest);
}
-std::string GetDoubleQuotedJson(const StringPiece& str) {
- std::string dst;
- JsonDoubleQuote(str, true, &dst);
- return dst;
+bool EscapeJSONString(const StringPiece16& str,
+ bool put_in_quotes,
+ std::string* dest) {
+ return EscapeJSONStringImpl(str, put_in_quotes, dest);
+}
+
+std::string GetQuotedJSONString(const StringPiece& str) {
+ std::string dest;
+ bool ok = EscapeJSONStringImpl(str, true, &dest);
+ DCHECK(ok);
+ return dest;
}
-void JsonDoubleQuote(const StringPiece16& str,
- bool put_in_quotes,
- std::string* dst) {
- JsonDoubleQuoteT(str, put_in_quotes, dst);
+std::string GetQuotedJSONString(const StringPiece16& str) {
+ std::string dest;
+ bool ok = EscapeJSONStringImpl(str, true, &dest);
+ DCHECK(ok);
+ return dest;
}
-std::string GetDoubleQuotedJson(const StringPiece16& str) {
- std::string dst;
- JsonDoubleQuote(str, true, &dst);
- return dst;
+std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
+ bool put_in_quotes) {
+ std::string dest;
+
+ if (put_in_quotes)
+ dest.push_back('"');
+
+ for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
+ ToUnsigned<StringPiece::value_type>::Unsigned c = *it;
+ if (EscapeSpecialCodePoint(c, &dest))
+ continue;
+
+ if (c < 32 || c > 126)
+ base::StringAppendF(&dest, kU16EscapeFormat, c);
+ else
+ dest.push_back(*it);
+ }
+
+ if (put_in_quotes)
+ dest.push_back('"');
+
+ return dest;
}
} // namespace base
« no previous file with comments | « base/json/string_escape.h ('k') | base/json/string_escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698