Index: base/sys_string_conversions_linux.cc |
diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc |
index 7661d8730b92705592f02fed2cc82054df0bc3a2..ff1d3c33c9ab96e0f4489943e597b6f8c8b72dd5 100644 |
--- a/base/sys_string_conversions_linux.cc |
+++ b/base/sys_string_conversions_linux.cc |
@@ -4,6 +4,8 @@ |
#include "base/sys_string_conversions.h" |
+#include <wchar.h> |
+ |
#include "base/string_piece.h" |
#include "base/string_util.h" |
@@ -23,13 +25,116 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) { |
} |
std::string SysWideToNativeMB(const std::wstring& wide) { |
- // TODO(evanm): we can't assume Linux is UTF-8. |
- return SysWideToUTF8(wide); |
+ mbstate_t ps; |
+ |
+ // Calculate the number of multi-byte characters. We walk through the string |
+ // without writing the output, counting the number of multi-byte characters. |
+ size_t num_out_chars = 0; |
+ memset(&ps, 0, sizeof(ps)); |
+ for (size_t i = 0; i < wide.size(); ++i) { |
+ const wchar_t src = wide[i]; |
+ // Use a temp buf since a output of NULL does not do what we want. |
Evan Martin
2009/06/22 15:22:56
s/a/an/
This comment doesn't really explain anythi
Dean McNamee
2009/06/22 15:26:02
Ok, I'll expand on it. Basically:
A third
|
+ char buf[16]; |
+ // We don't want wcrtomb to do it's funkiness for embedded NULLs. |
Evan Martin
2009/06/22 15:22:56
s/it's/its/
See above request for a descriptive c
|
+ size_t res = src ? wcrtomb(buf, src, &ps) : 0; |
+ switch (res) { |
+ // Handle any errors and return an empty string. |
+ case -1: |
+ return std::string(); |
+ break; |
+ case 0: |
+ // We hit an embedded null byte, keep going. |
+ ++num_out_chars; |
+ default: |
+ num_out_chars += res; |
Evan Martin
2009/06/22 15:22:56
Is it correct to do this if res is negative but no
Dean McNamee
2009/06/22 15:26:02
res is a size_t (unsigned).
|
+ break; |
+ } |
+ } |
+ |
+ if (num_out_chars == 0) |
+ return std::string(); |
+ |
+ std::string out; |
+ out.resize(num_out_chars); |
+ |
+ // We walk the input string again, with |i| tracking the index of the |
+ // wide input, and |j| tracking the multi-byte output. |
+ memset(&ps, 0, sizeof(ps)); |
+ for (size_t i = 0, j = 0; i < wide.size(); ++i) { |
+ const wchar_t src = wide[i]; |
+ // We don't want wcrtomb to do it's funkiness for embedded NULLs. |
+ size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; |
+ switch (res) { |
+ // Handle any errors and return an empty string. |
+ case -1: |
+ return std::string(); |
+ break; |
+ case 0: |
+ // We hit an embedded null byte, keep going. |
+ ++j; // Output is already 0. |
+ default: |
+ j += res; |
+ break; |
+ } |
+ } |
+ |
+ return out; |
} |
std::wstring SysNativeMBToWide(const StringPiece& native_mb) { |
- // TODO(evanm): we can't assume Linux is UTF-8. |
- return SysUTF8ToWide(native_mb); |
+ mbstate_t ps; |
+ |
+ // Calculate the number of wide characters. We walk through the string |
+ // without writing the output, counting the number of wide characters. |
+ size_t num_out_chars = 0; |
+ memset(&ps, 0, sizeof(ps)); |
+ for (size_t i = 0; i < native_mb.size(); ) { |
+ const char* src = native_mb.data() + i; |
+ size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); |
+ switch (res) { |
+ // Handle any errors and return an empty string. |
+ case -2: |
+ case -1: |
+ return std::wstring(); |
+ break; |
+ case 0: |
+ // We hit an embedded null byte, keep going. |
+ i += 1; // Fall through. |
+ default: |
+ i += res; |
+ ++num_out_chars; |
+ break; |
+ } |
+ } |
+ |
+ if (num_out_chars == 0) |
+ return std::wstring(); |
+ |
+ std::wstring out; |
+ out.resize(num_out_chars); |
+ |
+ memset(&ps, 0, sizeof(ps)); // Clear the shift state. |
+ // We walk the input string again, with |i| tracking the index of the |
+ // multi-byte input, and |j| tracking the wide output. |
+ for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { |
+ const char* src = native_mb.data() + i; |
+ wchar_t* dst = &out[j]; |
+ size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); |
+ switch (res) { |
+ // Handle any errors and return an empty string. |
+ case -2: |
+ case -1: |
+ return std::wstring(); |
+ break; |
+ case 0: |
+ i += 1; // Skip null, fall through. |
+ default: |
+ i += res; |
+ break; |
+ } |
+ } |
+ |
+ return out; |
} |
} // namespace base |