base/sys_string_conversions_linux.cc - Issue 149065: Implement Linux sys_string_conversions using the system APIs.

Unified Diff: base/sys_string_conversions_linux.cc

Issue 149065: Implement Linux sys_string_conversions using the system APIs. (Closed)

Patch Set: Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/sys_string_conversions_linux.cc

diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc

index 7661d8730b92705592f02fed2cc82054df0bc3a2..118f0ac48494e2048079457f1f6b094ab2f21b33 100644

--- a/base/sys_string_conversions_linux.cc

+++ b/base/sys_string_conversions_linux.cc

@@ -4,6 +4,8 @@

#include "base/sys_string_conversions.h"

+#include <wchar.h>

#include "base/string_piece.h"

#include "base/string_util.h"

@@ -23,13 +25,120 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) {

}

std::string SysWideToNativeMB(const std::wstring& wide) {

- // TODO(evanm): we can't assume Linux is UTF-8.

- return SysWideToUTF8(wide);

+ mbstate_t ps;

+ // Calculate the number of multi-byte characters. We walk through the string

+ // without writing the output, counting the number of multi-byte characters.

+ size_t num_out_chars = 0;

+ memset(&ps, 0, sizeof(ps));

+ for (size_t i = 0; i < wide.size(); ++i) {

+ const wchar_t src = wide[i];

+ // Use a temp buffer since calling wcrtomb with an output of NULL does not

+ // calculate the output length.

+ char buf[16];

+ // Skip NULLs to avoid wcrtomb's special handling of them.

+ size_t res = src ? wcrtomb(buf, src, &ps) : 0;

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -1:

+ return std::string();

+ break;

+ case 0:

+ // We hit an embedded null byte, keep going.

+ ++num_out_chars;

+ break;

+ default:

+ num_out_chars += res;

+ break;

+ }

+ if (num_out_chars == 0)

+ return std::string();

+ std::string out;

+ out.resize(num_out_chars);

+ // We walk the input string again, with |i| tracking the index of the

+ // wide input, and |j| tracking the multi-byte output.

+ memset(&ps, 0, sizeof(ps));

+ for (size_t i = 0, j = 0; i < wide.size(); ++i) {

+ const wchar_t src = wide[i];

+ // We don't want wcrtomb to do it's funkiness for embedded NULLs.

+ size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -1:

+ return std::string();

+ break;

+ case 0:

+ // We hit an embedded null byte, keep going.

+ ++j; // Output is already zeroed.

+ break;

+ default:

+ j += res;

+ break;

+ }

+ return out;

}

std::wstring SysNativeMBToWide(const StringPiece& native_mb) {

- // TODO(evanm): we can't assume Linux is UTF-8.

- return SysUTF8ToWide(native_mb);

+ mbstate_t ps;

Evan Martin 2009/06/26 15:19:26 Style frowns upon abbreviated variable names. "mb

Dean McNamee 2009/06/26 15:23:07 Yeah, that's the style they give in the man page s

+ // Calculate the number of wide characters. We walk through the string

+ // without writing the output, counting the number of wide characters.

+ size_t num_out_chars = 0;

+ memset(&ps, 0, sizeof(ps));

+ for (size_t i = 0; i < native_mb.size(); ) {

+ const char* src = native_mb.data() + i;

+ size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -2:

+ case -1:

+ return std::wstring();

+ break;

+ case 0:

+ // We hit an embedded null byte, keep going.

+ i += 1; // Fall through.

+ default:

+ i += res;

+ ++num_out_chars;

+ break;

+ }

+ if (num_out_chars == 0)

+ return std::wstring();

+ std::wstring out;

+ out.resize(num_out_chars);

+ memset(&ps, 0, sizeof(ps)); // Clear the shift state.

+ // We walk the input string again, with |i| tracking the index of the

+ // multi-byte input, and |j| tracking the wide output.

+ for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {

+ const char* src = native_mb.data() + i;

+ wchar_t* dst = &out[j];

+ size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -2:

+ case -1:

+ return std::wstring();

+ break;

+ case 0:

+ i += 1; // Skip null byte.

+ break;

+ default:

+ i += res;

+ break;

+ }

+ return out;

}

} // namespace base

« no previous file with comments | « no previous file | base/sys_string_conversions_unittest.cc » ('j') | base/sys_string_conversions_unittest.cc » ('J')