Chromium Code Reviews| Index: base/sys_string_conversions_linux.cc |
| diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc |
| index 7661d8730b92705592f02fed2cc82054df0bc3a2..118f0ac48494e2048079457f1f6b094ab2f21b33 100644 |
| --- a/base/sys_string_conversions_linux.cc |
| +++ b/base/sys_string_conversions_linux.cc |
| @@ -4,6 +4,8 @@ |
| #include "base/sys_string_conversions.h" |
| +#include <wchar.h> |
| + |
| #include "base/string_piece.h" |
| #include "base/string_util.h" |
| @@ -23,13 +25,120 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) { |
| } |
| std::string SysWideToNativeMB(const std::wstring& wide) { |
| - // TODO(evanm): we can't assume Linux is UTF-8. |
| - return SysWideToUTF8(wide); |
| + mbstate_t ps; |
| + |
| + // Calculate the number of multi-byte characters. We walk through the string |
| + // without writing the output, counting the number of multi-byte characters. |
| + size_t num_out_chars = 0; |
| + memset(&ps, 0, sizeof(ps)); |
| + for (size_t i = 0; i < wide.size(); ++i) { |
| + const wchar_t src = wide[i]; |
| + // Use a temp buffer since calling wcrtomb with an output of NULL does not |
| + // calculate the output length. |
| + char buf[16]; |
| + // Skip NULLs to avoid wcrtomb's special handling of them. |
| + size_t res = src ? wcrtomb(buf, src, &ps) : 0; |
| + switch (res) { |
| + // Handle any errors and return an empty string. |
| + case -1: |
| + return std::string(); |
| + break; |
| + case 0: |
| + // We hit an embedded null byte, keep going. |
| + ++num_out_chars; |
| + break; |
| + default: |
| + num_out_chars += res; |
| + break; |
| + } |
| + } |
| + |
| + if (num_out_chars == 0) |
| + return std::string(); |
| + |
| + std::string out; |
| + out.resize(num_out_chars); |
| + |
| + // We walk the input string again, with |i| tracking the index of the |
| + // wide input, and |j| tracking the multi-byte output. |
| + memset(&ps, 0, sizeof(ps)); |
| + for (size_t i = 0, j = 0; i < wide.size(); ++i) { |
| + const wchar_t src = wide[i]; |
| + // We don't want wcrtomb to do it's funkiness for embedded NULLs. |
| + size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; |
| + switch (res) { |
| + // Handle any errors and return an empty string. |
| + case -1: |
| + return std::string(); |
| + break; |
| + case 0: |
| + // We hit an embedded null byte, keep going. |
| + ++j; // Output is already zeroed. |
| + break; |
| + default: |
| + j += res; |
| + break; |
| + } |
| + } |
| + |
| + return out; |
| } |
| std::wstring SysNativeMBToWide(const StringPiece& native_mb) { |
| - // TODO(evanm): we can't assume Linux is UTF-8. |
| - return SysUTF8ToWide(native_mb); |
| + mbstate_t ps; |
|
Evan Martin
2009/06/26 15:19:26
Style frowns upon abbreviated variable names. "mb
Dean McNamee
2009/06/26 15:23:07
Yeah, that's the style they give in the man page s
|
| + |
| + // Calculate the number of wide characters. We walk through the string |
| + // without writing the output, counting the number of wide characters. |
| + size_t num_out_chars = 0; |
| + memset(&ps, 0, sizeof(ps)); |
| + for (size_t i = 0; i < native_mb.size(); ) { |
| + const char* src = native_mb.data() + i; |
| + size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); |
| + switch (res) { |
| + // Handle any errors and return an empty string. |
| + case -2: |
| + case -1: |
| + return std::wstring(); |
| + break; |
| + case 0: |
| + // We hit an embedded null byte, keep going. |
| + i += 1; // Fall through. |
| + default: |
| + i += res; |
| + ++num_out_chars; |
| + break; |
| + } |
| + } |
| + |
| + if (num_out_chars == 0) |
| + return std::wstring(); |
| + |
| + std::wstring out; |
| + out.resize(num_out_chars); |
| + |
| + memset(&ps, 0, sizeof(ps)); // Clear the shift state. |
| + // We walk the input string again, with |i| tracking the index of the |
| + // multi-byte input, and |j| tracking the wide output. |
| + for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { |
| + const char* src = native_mb.data() + i; |
| + wchar_t* dst = &out[j]; |
| + size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); |
| + switch (res) { |
| + // Handle any errors and return an empty string. |
| + case -2: |
| + case -1: |
| + return std::wstring(); |
| + break; |
| + case 0: |
| + i += 1; // Skip null byte. |
| + break; |
| + default: |
| + i += res; |
| + break; |
| + } |
| + } |
| + |
| + return out; |
| } |
| } // namespace base |