Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(195)

Side by Side Diff: base/json/string_escape.cc

Issue 100823007: Stop doing unnecessary UTF-8 to UTF-16 conversions in JSONWriter. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix ChromeOS page encodings Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/json/string_escape.h ('k') | base/json/string_escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/json/string_escape.h" 5 #include "base/json/string_escape.h"
6 6
7 #include <string> 7 #include <string>
8 8
9 #include "base/strings/string_util.h" 9 #include "base/strings/string_util.h"
10 #include "base/strings/stringprintf.h" 10 #include "base/strings/stringprintf.h"
11 #include "base/strings/utf_string_conversion_utils.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/third_party/icu/icu_utf.h"
11 14
12 namespace base { 15 namespace base {
13 16
14 namespace { 17 namespace {
15 18
16 // Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, 19 // Format string for printing a \uXXXX escape sequence.
17 // returns true and appends the escape sequence to |dst|. This isn't required 20 const char kU16EscapeFormat[] = "\\u%04X";
18 // by the spec, but it's more readable by humans than the \uXXXX alternatives. 21
19 template<typename CHAR> 22 // The code point to output for an invalid input code unit.
20 static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { 23 const uint32 kReplacementCodePoint = 0xFFFD;
24
25 // Used below in EscapeSpecialCodePoint().
26 COMPILE_ASSERT('<' == 0x3C, less_than_sign_is_0x3c);
27
28 // Try to escape the |code_point| if it is a known special character. If
29 // successful, returns true and appends the escape sequence to |dest|. This
30 // isn't required by the spec, but it's more readable by humans.
31 bool EscapeSpecialCodePoint(uint32 code_point, std::string* dest) {
21 // WARNING: if you add a new case here, you need to update the reader as well. 32 // WARNING: if you add a new case here, you need to update the reader as well.
22 // Note: \v is in the reader, but not here since the JSON spec doesn't 33 // Note: \v is in the reader, but not here since the JSON spec doesn't
23 // allow it. 34 // allow it.
24 switch (c) { 35 switch (code_point) {
25 case '\b': 36 case '\b':
26 dst->append("\\b"); 37 dest->append("\\b");
27 break; 38 break;
28 case '\f': 39 case '\f':
29 dst->append("\\f"); 40 dest->append("\\f");
30 break; 41 break;
31 case '\n': 42 case '\n':
32 dst->append("\\n"); 43 dest->append("\\n");
33 break; 44 break;
34 case '\r': 45 case '\r':
35 dst->append("\\r"); 46 dest->append("\\r");
36 break; 47 break;
37 case '\t': 48 case '\t':
38 dst->append("\\t"); 49 dest->append("\\t");
39 break; 50 break;
40 case '\\': 51 case '\\':
41 dst->append("\\\\"); 52 dest->append("\\\\");
42 break; 53 break;
43 case '"': 54 case '"':
44 dst->append("\\\""); 55 dest->append("\\\"");
56 break;
57 // Escape < to prevent script execution; escaping > is not necessary and
58 // not doing so save a few bytes.
59 case '<':
60 dest->append("\\u003C");
45 break; 61 break;
46 default: 62 default:
47 return false; 63 return false;
48 } 64 }
49 return true; 65 return true;
50 } 66 }
51 67
52 template <class STR> 68 template <typename S>
53 void JsonDoubleQuoteT(const STR& str, 69 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
54 bool put_in_quotes, 70 bool did_replacement = false;
55 std::string* dst) { 71
56 if (put_in_quotes) 72 if (put_in_quotes)
57 dst->push_back('"'); 73 dest->push_back('"');
58 74
59 for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { 75 // Casting is necessary because ICU uses int32. Try and do so safely.
60 typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; 76 CHECK_LE(str.length(), static_cast<size_t>(kint32max));
61 if (!JsonSingleEscapeChar(c, dst)) { 77 const int32 length = static_cast<int32>(str.length());
62 if (c < 32 || c > 126 || c == '<' || c == '>') { 78
63 // 1. Escaping <, > to prevent script execution. 79 for (int32 i = 0; i < length; ++i) {
64 // 2. Technically, we could also pass through c > 126 as UTF8, but this 80 uint32 code_point;
65 // is also optional. It would also be a pain to implement here. 81 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) {
66 unsigned int as_uint = static_cast<unsigned int>(c); 82 code_point = kReplacementCodePoint;
67 base::StringAppendF(dst, "\\u%04X", as_uint); 83 did_replacement = true;
68 } else {
69 unsigned char ascii = static_cast<unsigned char>(*it);
70 dst->push_back(ascii);
71 }
72 } 84 }
85
86 if (EscapeSpecialCodePoint(code_point, dest))
87 continue;
88
89 // Escape non-printing characters.
90 if (code_point < 32)
91 base::StringAppendF(dest, kU16EscapeFormat, code_point);
92 else
93 WriteUnicodeCharacter(code_point, dest);
73 } 94 }
74 95
75 if (put_in_quotes) 96 if (put_in_quotes)
76 dst->push_back('"'); 97 dest->push_back('"');
98
99 return !did_replacement;
77 } 100 }
78 101
79 } // namespace 102 } // namespace
80 103
81 void JsonDoubleQuote(const StringPiece& str, 104 bool EscapeJSONString(const StringPiece& str,
82 bool put_in_quotes, 105 bool put_in_quotes,
83 std::string* dst) { 106 std::string* dest) {
84 JsonDoubleQuoteT(str, put_in_quotes, dst); 107 return EscapeJSONStringImpl(str, put_in_quotes, dest);
85 } 108 }
86 109
87 std::string GetDoubleQuotedJson(const StringPiece& str) { 110 bool EscapeJSONString(const StringPiece16& str,
88 std::string dst; 111 bool put_in_quotes,
89 JsonDoubleQuote(str, true, &dst); 112 std::string* dest) {
90 return dst; 113 return EscapeJSONStringImpl(str, put_in_quotes, dest);
91 } 114 }
92 115
93 void JsonDoubleQuote(const StringPiece16& str, 116 std::string GetQuotedJSONString(const StringPiece& str) {
94 bool put_in_quotes, 117 std::string dest;
95 std::string* dst) { 118 bool ok = EscapeJSONStringImpl(str, true, &dest);
96 JsonDoubleQuoteT(str, put_in_quotes, dst); 119 DCHECK(ok);
120 return dest;
97 } 121 }
98 122
99 std::string GetDoubleQuotedJson(const StringPiece16& str) { 123 std::string GetQuotedJSONString(const StringPiece16& str) {
100 std::string dst; 124 std::string dest;
101 JsonDoubleQuote(str, true, &dst); 125 bool ok = EscapeJSONStringImpl(str, true, &dest);
102 return dst; 126 DCHECK(ok);
127 return dest;
128 }
129
130 std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
131 bool put_in_quotes) {
132 std::string dest;
133
134 if (put_in_quotes)
135 dest.push_back('"');
136
137 for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
138 ToUnsigned<StringPiece::value_type>::Unsigned c = *it;
139 if (EscapeSpecialCodePoint(c, &dest))
140 continue;
141
142 if (c < 32 || c > 126)
143 base::StringAppendF(&dest, kU16EscapeFormat, c);
144 else
145 dest.push_back(*it);
146 }
147
148 if (put_in_quotes)
149 dest.push_back('"');
150
151 return dest;
103 } 152 }
104 153
105 } // namespace base 154 } // namespace base
OLDNEW
« no previous file with comments | « base/json/string_escape.h ('k') | base/json/string_escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698