Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/json/string_escape.h" | 5 #include "base/json/string_escape.h" |
| 6 | 6 |
| 7 #include <string> | 7 #include <string> |
| 8 | 8 |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "base/strings/stringprintf.h" | 10 #include "base/strings/stringprintf.h" |
| 11 #include "base/strings/utf_string_conversion_utils.h" | |
| 12 #include "base/strings/utf_string_conversions.h" | |
| 13 #include "base/third_party/icu/icu_utf.h" | |
| 11 | 14 |
| 12 namespace base { | 15 namespace base { |
| 13 | 16 |
| 14 namespace { | 17 namespace { |
| 15 | 18 |
| 16 // Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful, | 19 // Format string for printing a \uXXXX escape sequence. |
| 17 // returns true and appends the escape sequence to |dst|. This isn't required | 20 const char kU16EscapeFormat[] = "\\u%04X"; |
| 18 // by the spec, but it's more readable by humans than the \uXXXX alternatives. | 21 |
| 19 template<typename CHAR> | 22 // The code point to output for an invalid input code unit. |
| 20 static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) { | 23 const uint32 kReplacementCodePoint = 0xFFFD; |
| 24 | |
| 25 // Try to escape the |code_point| if it is a known special character. If | |
| 26 // successful, returns true and appends the escape sequence to |dest|. This | |
| 27 // isn't required by the spec, but it's more readable by humans. | |
| 28 bool EscapeCodePoint(uint32 code_point, std::string* dest) { | |
|
Mark Mentovai
2013/12/06 15:35:12
This name makes it sound like it can escape any co
jungshik at Google
2013/12/07 13:16:59
Agree. I had to come back up here to remind me wha
Robert Sesek
2013/12/09 19:52:09
Done.
| |
| 21 // WARNING: if you add a new case here, you need to update the reader as well. | 29 // WARNING: if you add a new case here, you need to update the reader as well. |
| 22 // Note: \v is in the reader, but not here since the JSON spec doesn't | 30 // Note: \v is in the reader, but not here since the JSON spec doesn't |
| 23 // allow it. | 31 // allow it. |
| 24 switch (c) { | 32 switch (code_point) { |
| 25 case '\b': | 33 case '\b': |
| 26 dst->append("\\b"); | 34 dest->append("\\b"); |
| 27 break; | 35 break; |
| 28 case '\f': | 36 case '\f': |
| 29 dst->append("\\f"); | 37 dest->append("\\f"); |
| 30 break; | 38 break; |
| 31 case '\n': | 39 case '\n': |
| 32 dst->append("\\n"); | 40 dest->append("\\n"); |
| 33 break; | 41 break; |
| 34 case '\r': | 42 case '\r': |
| 35 dst->append("\\r"); | 43 dest->append("\\r"); |
| 36 break; | 44 break; |
| 37 case '\t': | 45 case '\t': |
| 38 dst->append("\\t"); | 46 dest->append("\\t"); |
| 39 break; | 47 break; |
| 40 case '\\': | 48 case '\\': |
| 41 dst->append("\\\\"); | 49 dest->append("\\\\"); |
| 42 break; | 50 break; |
| 43 case '"': | 51 case '"': |
| 44 dst->append("\\\""); | 52 dest->append("\\\""); |
| 53 break; | |
| 54 // Escape <, > to prevent script execution. | |
|
Mark Mentovai
2013/12/06 15:35:12
You really only need < for this, you can save a co
Robert Sesek
2013/12/09 19:52:09
Done.
| |
| 55 case '<': | |
| 56 dest->append("\\u003C"); | |
| 57 break; | |
| 58 case '>': | |
| 59 dest->append("\\u003E"); | |
| 45 break; | 60 break; |
| 46 default: | 61 default: |
| 47 return false; | 62 return false; |
| 48 } | 63 } |
| 49 return true; | 64 return true; |
| 50 } | 65 } |
| 51 | 66 |
| 52 template <class STR> | 67 template <typename S> |
| 53 void JsonDoubleQuoteT(const STR& str, | 68 int32 StringLength(const S& str) { |
| 69 // Casting is necessary because ICU uses int32. Try and do so safely. | |
| 70 CHECK_LE(str.length(), static_cast<size_t>(kint32max)); | |
|
Mark Mentovai
2013/12/06 15:35:12
std::numeric_limits<int32>::max() instead of kint3
Robert Sesek
2013/12/09 19:52:09
There seems to be a preference for the constant wh
| |
| 71 return static_cast<int32>(str.length()); | |
| 72 } | |
| 73 | |
| 74 } // namespace | |
| 75 | |
| 76 void EscapeJSONString(const StringPiece& str, | |
| 54 bool put_in_quotes, | 77 bool put_in_quotes, |
| 55 std::string* dst) { | 78 std::string* dest) { |
| 56 if (put_in_quotes) | 79 if (put_in_quotes) |
| 57 dst->push_back('"'); | 80 dest->push_back('"'); |
| 58 | 81 |
| 59 for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) { | 82 const int32 length = StringLength(str); |
| 60 typename ToUnsigned<typename STR::value_type>::Unsigned c = *it; | 83 for (int32 i = 0; i < length; ++i) { |
| 61 if (!JsonSingleEscapeChar(c, dst)) { | 84 uint32 code_point; |
| 62 if (c < 32 || c > 126 || c == '<' || c == '>') { | 85 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) |
| 63 // 1. Escaping <, > to prevent script execution. | 86 code_point = kReplacementCodePoint; |
| 64 // 2. Technically, we could also pass through c > 126 as UTF8, but this | 87 |
| 65 // is also optional. It would also be a pain to implement here. | 88 if (EscapeCodePoint(code_point, dest)) |
| 66 unsigned int as_uint = static_cast<unsigned int>(c); | 89 continue; |
| 67 base::StringAppendF(dst, "\\u%04X", as_uint); | 90 |
| 91 // Escape non-printing characters. | |
| 92 if (code_point < 32) | |
| 93 base::StringAppendF(dest, kU16EscapeFormat, code_point); | |
| 94 else | |
| 95 WriteUnicodeCharacter(code_point, dest); | |
| 96 } | |
| 97 | |
| 98 if (put_in_quotes) | |
| 99 dest->push_back('"'); | |
| 100 } | |
| 101 | |
| 102 void EscapeJSONString(const StringPiece16& str, | |
| 103 bool put_in_quotes, | |
| 104 std::string* dest) { | |
| 105 if (put_in_quotes) | |
| 106 dest->push_back('"'); | |
| 107 | |
| 108 const int32 length = StringLength(str); | |
| 109 for (int32 i = 0; i < length; ++i) { | |
| 110 uint32 code_point; | |
| 111 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) | |
| 112 code_point = kReplacementCodePoint; | |
|
jungshik at Google
2013/12/07 13:16:59
ReadUnicodeCharacter passes througn non-character
Robert Sesek
2013/12/09 19:52:09
base::StringValue performs a DCHECK(IsStrintUTF8()
| |
| 113 | |
| 114 if (EscapeCodePoint(code_point, dest)) | |
| 115 continue; | |
| 116 | |
| 117 if (code_point < 32 || code_point > 126) { | |
| 118 if (CBU16_LENGTH(code_point) == 1) { | |
| 119 base::StringAppendF(dest, kU16EscapeFormat, code_point); | |
| 68 } else { | 120 } else { |
| 69 unsigned char ascii = static_cast<unsigned char>(*it); | 121 base::StringAppendF(dest, kU16EscapeFormat, CBU16_LEAD(code_point)); |
| 70 dst->push_back(ascii); | 122 base::StringAppendF(dest, kU16EscapeFormat, CBU16_TRAIL(code_point)); |
| 71 } | 123 } |
| 124 } else { | |
| 125 dest->push_back(static_cast<char>(code_point)); | |
| 72 } | 126 } |
| 73 } | 127 } |
| 74 | 128 |
| 75 if (put_in_quotes) | 129 if (put_in_quotes) |
| 76 dst->push_back('"'); | 130 dest->push_back('"'); |
| 77 } | 131 } |
| 78 | 132 |
| 79 } // namespace | 133 std::string GetQuotedJSONString(const StringPiece& str) { |
| 80 | 134 std::string dest; |
| 81 void JsonDoubleQuote(const StringPiece& str, | 135 EscapeJSONString(str, true, &dest); |
| 82 bool put_in_quotes, | 136 return dest; |
| 83 std::string* dst) { | |
| 84 JsonDoubleQuoteT(str, put_in_quotes, dst); | |
| 85 } | 137 } |
| 86 | 138 |
| 87 std::string GetDoubleQuotedJson(const StringPiece& str) { | 139 std::string GetQuotedJSONString(const StringPiece16& str) { |
| 88 std::string dst; | 140 std::string dest; |
| 89 JsonDoubleQuote(str, true, &dst); | 141 EscapeJSONString(str, true, &dest); |
| 90 return dst; | 142 return dest; |
| 91 } | 143 } |
| 92 | 144 |
| 93 void JsonDoubleQuote(const StringPiece16& str, | 145 std::string EscapeBytesAsInvalidJSONString(const StringPiece& str, |
| 94 bool put_in_quotes, | 146 bool put_in_quotes) { |
| 95 std::string* dst) { | 147 std::string dest; |
| 96 JsonDoubleQuoteT(str, put_in_quotes, dst); | |
| 97 } | |
| 98 | 148 |
| 99 std::string GetDoubleQuotedJson(const StringPiece16& str) { | 149 if (put_in_quotes) |
| 100 std::string dst; | 150 dest.push_back('"'); |
| 101 JsonDoubleQuote(str, true, &dst); | 151 |
| 102 return dst; | 152 for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) { |
| 153 ToUnsigned<StringPiece::value_type>::Unsigned c = *it; | |
| 154 if (EscapeCodePoint(c, &dest)) | |
| 155 continue; | |
| 156 | |
| 157 if (c < 32 || c > 126) | |
| 158 base::StringAppendF(&dest, kU16EscapeFormat, c); | |
| 159 else | |
| 160 dest.push_back(*it); | |
| 161 } | |
| 162 | |
| 163 if (put_in_quotes) | |
| 164 dest.push_back('"'); | |
| 165 | |
| 166 return dest; | |
| 103 } | 167 } |
| 104 | 168 |
| 105 } // namespace base | 169 } // namespace base |
| OLD | NEW |