OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "base/logging.h" |
| 6 #include "base/string_util.h" |
| 7 #include "googleurl/src/url_canon.h" |
| 8 |
| 9 #include <windows.h> |
| 10 |
| 11 //////////////////////////////////////////////////////////////////////////////// |
| 12 // Avoid dependency on string_util_icu.cc (which pulls in icu). |
| 13 |
| 14 std::string WideToAnsiDirect(const wchar_t* wide, size_t wide_len) { |
| 15 std::string ret; |
| 16 char* write = WriteInto(&ret, wide_len + 1); |
| 17 for (size_t i = 0; i < wide_len; ++i) { |
| 18 // We can only convert characters below 0x80 directly from wide to ansi. |
| 19 DCHECK(wide[i] <= 127) << "can't convert"; |
| 20 write[i] = static_cast<char>(wide[i]); |
| 21 } |
| 22 |
| 23 write[wide_len] = '\0'; |
| 24 |
| 25 return ret; |
| 26 } |
| 27 |
| 28 bool WideToUTF8(const wchar_t* wide, size_t wide_len, std::string* utf8) { |
| 29 DCHECK(utf8); |
| 30 |
| 31 // Add a cutoff. If it's all ASCII, convert it directly |
| 32 size_t i; |
| 33 for (i = 0; i < wide_len; ++i) { |
| 34 if (wide[i] > 127) |
| 35 break; |
| 36 } |
| 37 |
| 38 // If we made it to the end without breaking, then it's all ANSI, so do a |
| 39 // quick convert |
| 40 if (i == wide_len) { |
| 41 *utf8 = WideToAnsiDirect(wide, wide_len); |
| 42 return true; |
| 43 } |
| 44 |
| 45 // Figure out how long the string is |
| 46 int size = WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, NULL, 0, NULL, |
| 47 NULL); |
| 48 |
| 49 if (size > 0) { |
| 50 WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, WriteInto(utf8, size), |
| 51 size, NULL, NULL); |
| 52 } |
| 53 |
| 54 return (size > 0); |
| 55 } |
| 56 |
| 57 std::string WideToUTF8(const std::wstring& wide) { |
| 58 std::string ret; |
| 59 if (!wide.empty()) { |
| 60 // Ignore the success flag of this call, it will do the best it can for |
| 61 // invalid input, which is what we want here. |
| 62 WideToUTF8(wide.data(), wide.length(), &ret); |
| 63 } |
| 64 return ret; |
| 65 } |
| 66 |
| 67 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { |
| 68 DCHECK(output); |
| 69 |
| 70 if (src_len == 0) { |
| 71 output->clear(); |
| 72 return true; |
| 73 } |
| 74 |
| 75 int wide_chars = MultiByteToWideChar(CP_UTF8, 0, src, src_len, NULL, 0); |
| 76 if (!wide_chars) { |
| 77 NOTREACHED(); |
| 78 return false; |
| 79 } |
| 80 |
| 81 wide_chars++; // make room for L'\0' |
| 82 // Note that WriteInto will fill the string with '\0', so in the case |
| 83 // where the input string is not \0 terminated, we will still be ensured |
| 84 // that the output string will be. |
| 85 if (!MultiByteToWideChar(CP_UTF8, 0, src, src_len, |
| 86 WriteInto(output, wide_chars), wide_chars)) { |
| 87 NOTREACHED(); |
| 88 output->clear(); |
| 89 return false; |
| 90 } |
| 91 |
| 92 return true; |
| 93 } |
| 94 |
| 95 std::wstring UTF8ToWide(const base::StringPiece& utf8) { |
| 96 std::wstring ret; |
| 97 if (!utf8.empty()) |
| 98 UTF8ToWide(utf8.data(), utf8.length(), &ret); |
| 99 return ret; |
| 100 } |
| 101 |
| 102 #ifdef WCHAR_T_IS_UTF16 |
| 103 string16 UTF8ToUTF16(const std::string& utf8) { |
| 104 std::wstring ret; |
| 105 if (!utf8.empty()) |
| 106 UTF8ToWide(utf8.data(), utf8.length(), &ret); |
| 107 return ret; |
| 108 } |
| 109 #else |
| 110 #error Need WCHAR_T_IS_UTF16 |
| 111 #endif |
| 112 |
| 113 //////////////////////////////////////////////////////////////////////////////// |
| 114 // Replace ICU dependent functions in googleurl. |
| 115 /*#define __UTF_H__ |
| 116 #include "third_party/icu38/public/common/unicode/utf16.h" |
| 117 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) |
| 118 extern const char16 kUnicodeReplacementCharacter;*/ |
| 119 |
| 120 namespace url_canon { |
| 121 |
| 122 bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output) { |
| 123 // We should only hit this when the user attempts to navigate |
| 124 // CF to an invalid URL. |
| 125 DLOG(WARNING) << __FUNCTION__ << " not implemented"; |
| 126 return false; |
| 127 } |
| 128 |
| 129 bool ReadUTFChar(const char* str, int* begin, int length, |
| 130 unsigned* code_point_out) { |
| 131 // We should only hit this when the user attempts to navigate |
| 132 // CF to an invalid URL. |
| 133 DLOG(WARNING) << __FUNCTION__ << " not implemented"; |
| 134 |
| 135 // TODO(tommi): consider if we can use something like |
| 136 // http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
| 137 return false; |
| 138 } |
| 139 |
| 140 bool ReadUTFChar(const char16* str, int* begin, int length, |
| 141 unsigned* code_point) { |
| 142 /* |
| 143 if (U16_IS_SURROGATE(str[*begin])) { |
| 144 if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || |
| 145 !U16_IS_TRAIL(str[*begin + 1])) { |
| 146 // Invalid surrogate pair. |
| 147 *code_point = kUnicodeReplacementCharacter; |
| 148 return false; |
| 149 } else { |
| 150 // Valid surrogate pair. |
| 151 *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]); |
| 152 (*begin)++; |
| 153 } |
| 154 } else { |
| 155 // Not a surrogate, just one 16-bit word. |
| 156 *code_point = str[*begin]; |
| 157 } |
| 158 |
| 159 if (U_IS_UNICODE_CHAR(*code_point)) |
| 160 return true; |
| 161 |
| 162 // Invalid code point. |
| 163 *code_point = kUnicodeReplacementCharacter; |
| 164 return false;*/ |
| 165 CHECK(false); |
| 166 return false; |
| 167 } |
| 168 |
| 169 } // namespace url_canon |
OLD | NEW |