Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Side by Side Diff: base/json/string_escape.cc

Issue 100823007: Stop doing unnecessary UTF-8 to UTF-16 conversions in JSONWriter. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: '' Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/json/string_escape.h" 5 #include "base/json/string_escape.h"
6 6
7 #include <string> 7 #include <string>
8 8
9 #include "base/strings/string_util.h" 9 #include "base/strings/string_util.h"
10 #include "base/strings/stringprintf.h" 10 #include "base/strings/stringprintf.h"
11 #include "base/strings/utf_string_conversion_utils.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/third_party/icu/icu_utf.h"
11 14
12 namespace base { 15 namespace base {
13 16
14 namespace { 17 namespace {
15 18
16 // Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, 19 // Format string for printing a \uXXXX escape sequence.
17 // returns true and appends the escape sequence to |dst|. This isn't required 20 const char kU16EscapeFormat[] = "\\u%04X";
18 // by the spec, but it's more readable by humans than the \uXXXX alternatives. 21
19 template<typename CHAR> 22 // The code point to output for an invalid input code unit.
20 static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { 23 const uint32 kReplacementCodePoint = 0xFFFD;
24
25 // Try to escape the |code_point| if it is a known special character. If
26 // successful, returns true and appends the escape sequence to |dest|. This
27 // isn't required by the spec, but it's more readable by humans.
28 bool EscapeCodePoint(uint32 code_point, std::string* dest) {
Mark Mentovai 2013/12/06 15:35:12 This name makes it sound like it can escape any co
jungshik at Google 2013/12/07 13:16:59 Agree. I had to come back up here to remind me wha
Robert Sesek 2013/12/09 19:52:09 Done.
21 // WARNING: if you add a new case here, you need to update the reader as well. 29 // WARNING: if you add a new case here, you need to update the reader as well.
22 // Note: \v is in the reader, but not here since the JSON spec doesn't 30 // Note: \v is in the reader, but not here since the JSON spec doesn't
23 // allow it. 31 // allow it.
24 switch (c) { 32 switch (code_point) {
25 case '\b': 33 case '\b':
26 dst->append("\\b"); 34 dest->append("\\b");
27 break; 35 break;
28 case '\f': 36 case '\f':
29 dst->append("\\f"); 37 dest->append("\\f");
30 break; 38 break;
31 case '\n': 39 case '\n':
32 dst->append("\\n"); 40 dest->append("\\n");
33 break; 41 break;
34 case '\r': 42 case '\r':
35 dst->append("\\r"); 43 dest->append("\\r");
36 break; 44 break;
37 case '\t': 45 case '\t':
38 dst->append("\\t"); 46 dest->append("\\t");
39 break; 47 break;
40 case '\\': 48 case '\\':
41 dst->append("\\\\"); 49 dest->append("\\\\");
42 break; 50 break;
43 case '"': 51 case '"':
44 dst->append("\\\""); 52 dest->append("\\\"");
53 break;
54 // Escape <, > to prevent script execution.
Mark Mentovai 2013/12/06 15:35:12 You really only need < for this, you can save a co
Robert Sesek 2013/12/09 19:52:09 Done.
55 case '<':
56 dest->append("\\u003C");
57 break;
58 case '>':
59 dest->append("\\u003E");
45 break; 60 break;
46 default: 61 default:
47 return false; 62 return false;
48 } 63 }
49 return true; 64 return true;
50 } 65 }
51 66
52 template <class STR> 67 template <typename S>
53 void JsonDoubleQuoteT(const STR& str, 68 int32 StringLength(const S& str) {
69 // Casting is necessary because ICU uses int32. Try and do so safely.
70 CHECK_LE(str.length(), static_cast<size_t>(kint32max));
Mark Mentovai 2013/12/06 15:35:12 std::numeric_limits<int32>::max() instead of kint3
Robert Sesek 2013/12/09 19:52:09 There seems to be a preference for the constant wh
71 return static_cast<int32>(str.length());
72 }
73
74 } // namespace
75
76 void EscapeJSONString(const StringPiece& str,
54 bool put_in_quotes, 77 bool put_in_quotes,
55 std::string* dst) { 78 std::string* dest) {
56 if (put_in_quotes) 79 if (put_in_quotes)
57 dst->push_back('"'); 80 dest->push_back('"');
58 81
59 for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { 82 const int32 length = StringLength(str);
60 typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; 83 for (int32 i = 0; i < length; ++i) {
61 if (!JsonSingleEscapeChar(c, dst)) { 84 uint32 code_point;
62 if (c < 32 || c > 126 || c == '<' || c == '>') { 85 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point))
63 // 1. Escaping <, > to prevent script execution. 86 code_point = kReplacementCodePoint;
64 // 2. Technically, we could also pass through c > 126 as UTF8, but this 87
65 // is also optional. It would also be a pain to implement here. 88 if (EscapeCodePoint(code_point, dest))
66 unsigned int as_uint = static_cast<unsigned int>(c); 89 continue;
67 base::StringAppendF(dst, "\\u%04X", as_uint); 90
91 // Escape non-printing characters.
92 if (code_point < 32)
93 base::StringAppendF(dest, kU16EscapeFormat, code_point);
94 else
95 WriteUnicodeCharacter(code_point, dest);
96 }
97
98 if (put_in_quotes)
99 dest->push_back('"');
100 }
101
102 void EscapeJSONString(const StringPiece16& str,
103 bool put_in_quotes,
104 std::string* dest) {
105 if (put_in_quotes)
106 dest->push_back('"');
107
108 const int32 length = StringLength(str);
109 for (int32 i = 0; i < length; ++i) {
110 uint32 code_point;
111 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point))
112 code_point = kReplacementCodePoint;
jungshik at Google 2013/12/07 13:16:59 ReadUnicodeCharacter passes througn non-character
Robert Sesek 2013/12/09 19:52:09 base::StringValue performs a DCHECK(IsStrintUTF8()
113
114 if (EscapeCodePoint(code_point, dest))
115 continue;
116
117 if (code_point < 32 || code_point > 126) {
118 if (CBU16_LENGTH(code_point) == 1) {
119 base::StringAppendF(dest, kU16EscapeFormat, code_point);
68 } else { 120 } else {
69 unsigned char ascii = static_cast<unsigned char>(*it); 121 base::StringAppendF(dest, kU16EscapeFormat, CBU16_LEAD(code_point));
70 dst->push_back(ascii); 122 base::StringAppendF(dest, kU16EscapeFormat, CBU16_TRAIL(code_point));
71 } 123 }
124 } else {
125 dest->push_back(static_cast<char>(code_point));
72 } 126 }
73 } 127 }
74 128
75 if (put_in_quotes) 129 if (put_in_quotes)
76 dst->push_back('"'); 130 dest->push_back('"');
77 } 131 }
78 132
79 } // namespace 133 std::string GetQuotedJSONString(const StringPiece& str) {
80 134 std::string dest;
81 void JsonDoubleQuote(const StringPiece& str, 135 EscapeJSONString(str, true, &dest);
82 bool put_in_quotes, 136 return dest;
83 std::string* dst) {
84 JsonDoubleQuoteT(str, put_in_quotes, dst);
85 } 137 }
86 138
87 std::string GetDoubleQuotedJson(const StringPiece& str) { 139 std::string GetQuotedJSONString(const StringPiece16& str) {
88 std::string dst; 140 std::string dest;
89 JsonDoubleQuote(str, true, &dst); 141 EscapeJSONString(str, true, &dest);
90 return dst; 142 return dest;
91 } 143 }
92 144
93 void JsonDoubleQuote(const StringPiece16& str, 145 std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
94 bool put_in_quotes, 146 bool put_in_quotes) {
95 std::string* dst) { 147 std::string dest;
96 JsonDoubleQuoteT(str, put_in_quotes, dst);
97 }
98 148
99 std::string GetDoubleQuotedJson(const StringPiece16& str) { 149 if (put_in_quotes)
100 std::string dst; 150 dest.push_back('"');
101 JsonDoubleQuote(str, true, &dst); 151
102 return dst; 152 for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
153 ToUnsigned<StringPiece::value_type>::Unsigned c = *it;
154 if (EscapeCodePoint(c, &dest))
155 continue;
156
157 if (c < 32 || c > 126)
158 base::StringAppendF(&dest, kU16EscapeFormat, c);
159 else
160 dest.push_back(*it);
161 }
162
163 if (put_in_quotes)
164 dest.push_back('"');
165
166 return dest;
103 } 167 }
104 168
105 } // namespace base 169 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698