| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/strings/utf_string_conversions.h" | |
| 6 | |
| 7 #include "base/strings/string_piece.h" | |
| 8 #include "base/strings/string_util.h" | |
| 9 #include "base/strings/utf_string_conversion_utils.h" | |
| 10 | |
| 11 namespace base { | |
| 12 | |
| 13 namespace { | |
| 14 | |
| 15 // Generalized Unicode converter ----------------------------------------------- | |
| 16 | |
| 17 // Converts the given source Unicode character type to the given destination | |
| 18 // Unicode character type as a STL string. The given input buffer and size | |
| 19 // determine the source, and the given output STL string will be replaced by | |
| 20 // the result. | |
| 21 template<typename SRC_CHAR, typename DEST_STRING> | |
| 22 bool ConvertUnicode(const SRC_CHAR* src, | |
| 23 size_t src_len, | |
| 24 DEST_STRING* output) { | |
| 25 // ICU requires 32-bit numbers. | |
| 26 bool success = true; | |
| 27 int32 src_len32 = static_cast<int32>(src_len); | |
| 28 for (int32 i = 0; i < src_len32; i++) { | |
| 29 uint32 code_point; | |
| 30 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { | |
| 31 WriteUnicodeCharacter(code_point, output); | |
| 32 } else { | |
| 33 WriteUnicodeCharacter(0xFFFD, output); | |
| 34 success = false; | |
| 35 } | |
| 36 } | |
| 37 | |
| 38 return success; | |
| 39 } | |
| 40 | |
| 41 } // namespace | |
| 42 | |
| 43 // UTF-8 <-> Wide -------------------------------------------------------------- | |
| 44 | |
| 45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { | |
| 46 if (IsStringASCII(std::wstring(src, src_len))) { | |
| 47 output->assign(src, src + src_len); | |
| 48 return true; | |
| 49 } else { | |
| 50 PrepareForUTF8Output(src, src_len, output); | |
| 51 return ConvertUnicode(src, src_len, output); | |
| 52 } | |
| 53 } | |
| 54 | |
| 55 std::string WideToUTF8(const std::wstring& wide) { | |
| 56 if (IsStringASCII(wide)) { | |
| 57 return std::string(wide.data(), wide.data() + wide.length()); | |
| 58 } | |
| 59 | |
| 60 std::string ret; | |
| 61 PrepareForUTF8Output(wide.data(), wide.length(), &ret); | |
| 62 ConvertUnicode(wide.data(), wide.length(), &ret); | |
| 63 return ret; | |
| 64 } | |
| 65 | |
| 66 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { | |
| 67 if (IsStringASCII(StringPiece(src, src_len))) { | |
| 68 output->assign(src, src + src_len); | |
| 69 return true; | |
| 70 } else { | |
| 71 PrepareForUTF16Or32Output(src, src_len, output); | |
| 72 return ConvertUnicode(src, src_len, output); | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 std::wstring UTF8ToWide(const StringPiece& utf8) { | |
| 77 if (IsStringASCII(utf8)) { | |
| 78 return std::wstring(utf8.begin(), utf8.end()); | |
| 79 } | |
| 80 | |
| 81 std::wstring ret; | |
| 82 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); | |
| 83 ConvertUnicode(utf8.data(), utf8.length(), &ret); | |
| 84 return ret; | |
| 85 } | |
| 86 | |
| 87 // UTF-16 <-> Wide ------------------------------------------------------------- | |
| 88 | |
| 89 #if defined(WCHAR_T_IS_UTF16) | |
| 90 | |
| 91 // When wide == UTF-16, then conversions are a NOP. | |
| 92 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { | |
| 93 output->assign(src, src_len); | |
| 94 return true; | |
| 95 } | |
| 96 | |
| 97 string16 WideToUTF16(const std::wstring& wide) { | |
| 98 return wide; | |
| 99 } | |
| 100 | |
| 101 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { | |
| 102 output->assign(src, src_len); | |
| 103 return true; | |
| 104 } | |
| 105 | |
| 106 std::wstring UTF16ToWide(const string16& utf16) { | |
| 107 return utf16; | |
| 108 } | |
| 109 | |
| 110 #elif defined(WCHAR_T_IS_UTF32) | |
| 111 | |
| 112 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { | |
| 113 output->clear(); | |
| 114 // Assume that normally we won't have any non-BMP characters so the counts | |
| 115 // will be the same. | |
| 116 output->reserve(src_len); | |
| 117 return ConvertUnicode(src, src_len, output); | |
| 118 } | |
| 119 | |
| 120 string16 WideToUTF16(const std::wstring& wide) { | |
| 121 string16 ret; | |
| 122 WideToUTF16(wide.data(), wide.length(), &ret); | |
| 123 return ret; | |
| 124 } | |
| 125 | |
| 126 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { | |
| 127 output->clear(); | |
| 128 // Assume that normally we won't have any non-BMP characters so the counts | |
| 129 // will be the same. | |
| 130 output->reserve(src_len); | |
| 131 return ConvertUnicode(src, src_len, output); | |
| 132 } | |
| 133 | |
| 134 std::wstring UTF16ToWide(const string16& utf16) { | |
| 135 std::wstring ret; | |
| 136 UTF16ToWide(utf16.data(), utf16.length(), &ret); | |
| 137 return ret; | |
| 138 } | |
| 139 | |
| 140 #endif // defined(WCHAR_T_IS_UTF32) | |
| 141 | |
| 142 // UTF16 <-> UTF8 -------------------------------------------------------------- | |
| 143 | |
| 144 #if defined(WCHAR_T_IS_UTF32) | |
| 145 | |
| 146 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { | |
| 147 if (IsStringASCII(StringPiece(src, src_len))) { | |
| 148 output->assign(src, src + src_len); | |
| 149 return true; | |
| 150 } else { | |
| 151 PrepareForUTF16Or32Output(src, src_len, output); | |
| 152 return ConvertUnicode(src, src_len, output); | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 string16 UTF8ToUTF16(const StringPiece& utf8) { | |
| 157 if (IsStringASCII(utf8)) { | |
| 158 return string16(utf8.begin(), utf8.end()); | |
| 159 } | |
| 160 | |
| 161 string16 ret; | |
| 162 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); | |
| 163 // Ignore the success flag of this call, it will do the best it can for | |
| 164 // invalid input, which is what we want here. | |
| 165 ConvertUnicode(utf8.data(), utf8.length(), &ret); | |
| 166 return ret; | |
| 167 } | |
| 168 | |
| 169 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { | |
| 170 if (IsStringASCII(StringPiece16(src, src_len))) { | |
| 171 output->assign(src, src + src_len); | |
| 172 return true; | |
| 173 } else { | |
| 174 PrepareForUTF8Output(src, src_len, output); | |
| 175 return ConvertUnicode(src, src_len, output); | |
| 176 } | |
| 177 } | |
| 178 | |
| 179 std::string UTF16ToUTF8(const string16& utf16) { | |
| 180 if (IsStringASCII(utf16)) { | |
| 181 return std::string(utf16.begin(), utf16.end()); | |
| 182 } | |
| 183 | |
| 184 std::string ret; | |
| 185 // Ignore the success flag of this call, it will do the best it can for | |
| 186 // invalid input, which is what we want here. | |
| 187 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); | |
| 188 return ret; | |
| 189 } | |
| 190 | |
| 191 #elif defined(WCHAR_T_IS_UTF16) | |
| 192 // Easy case since we can use the "wide" versions we already wrote above. | |
| 193 | |
| 194 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { | |
| 195 return UTF8ToWide(src, src_len, output); | |
| 196 } | |
| 197 | |
| 198 string16 UTF8ToUTF16(const StringPiece& utf8) { | |
| 199 return UTF8ToWide(utf8); | |
| 200 } | |
| 201 | |
| 202 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { | |
| 203 return WideToUTF8(src, src_len, output); | |
| 204 } | |
| 205 | |
| 206 std::string UTF16ToUTF8(const string16& utf16) { | |
| 207 return WideToUTF8(utf16); | |
| 208 } | |
| 209 | |
| 210 #endif | |
| 211 | |
| 212 string16 ASCIIToUTF16(const StringPiece& ascii) { | |
| 213 DCHECK(IsStringASCII(ascii)) << ascii; | |
| 214 return string16(ascii.begin(), ascii.end()); | |
| 215 } | |
| 216 | |
| 217 std::string UTF16ToASCII(const string16& utf16) { | |
| 218 DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); | |
| 219 return std::string(utf16.begin(), utf16.end()); | |
| 220 } | |
| 221 | |
| 222 } // namespace base | |
| OLD | NEW |