base/sys_string_conversions_linux.cc - Issue 140062: Implement Linux sys_string_conversions using the system APIs.

Unified Diff: base/sys_string_conversions_linux.cc

Issue 140062: Implement Linux sys_string_conversions using the system APIs. (Closed)

Patch Set: Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/sys_string_conversions_linux.cc

diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc

index 7661d8730b92705592f02fed2cc82054df0bc3a2..ff1d3c33c9ab96e0f4489943e597b6f8c8b72dd5 100644

--- a/base/sys_string_conversions_linux.cc

+++ b/base/sys_string_conversions_linux.cc

@@ -4,6 +4,8 @@

#include "base/sys_string_conversions.h"

+#include <wchar.h>

#include "base/string_piece.h"

#include "base/string_util.h"

@@ -23,13 +25,116 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) {

}

std::string SysWideToNativeMB(const std::wstring& wide) {

- // TODO(evanm): we can't assume Linux is UTF-8.

- return SysWideToUTF8(wide);

+ mbstate_t ps;

+ // Calculate the number of multi-byte characters. We walk through the string

+ // without writing the output, counting the number of multi-byte characters.

+ size_t num_out_chars = 0;

+ memset(&ps, 0, sizeof(ps));

+ for (size_t i = 0; i < wide.size(); ++i) {

+ const wchar_t src = wide[i];

+ // Use a temp buf since a output of NULL does not do what we want.

Evan Martin 2009/06/22 15:22:56 s/a/an/ This comment doesn't really explain anythi

Dean McNamee 2009/06/22 15:26:02 Ok, I'll expand on it. Basically: A third

+ char buf[16];

+ // We don't want wcrtomb to do it's funkiness for embedded NULLs.

Evan Martin 2009/06/22 15:22:56 s/it's/its/ See above request for a descriptive c

+ size_t res = src ? wcrtomb(buf, src, &ps) : 0;

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -1:

+ return std::string();

+ break;

+ case 0:

+ // We hit an embedded null byte, keep going.

+ ++num_out_chars;

+ default:

+ num_out_chars += res;

Evan Martin 2009/06/22 15:22:56 Is it correct to do this if res is negative but no

Dean McNamee 2009/06/22 15:26:02 res is a size_t (unsigned).

+ break;

+ }

+ if (num_out_chars == 0)

+ return std::string();

+ std::string out;

+ out.resize(num_out_chars);

+ // We walk the input string again, with |i| tracking the index of the

+ // wide input, and |j| tracking the multi-byte output.

+ memset(&ps, 0, sizeof(ps));

+ for (size_t i = 0, j = 0; i < wide.size(); ++i) {

+ const wchar_t src = wide[i];

+ // We don't want wcrtomb to do it's funkiness for embedded NULLs.

+ size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -1:

+ return std::string();

+ break;

+ case 0:

+ // We hit an embedded null byte, keep going.

+ ++j; // Output is already 0.

+ default:

+ j += res;

+ break;

+ }

+ return out;

}

std::wstring SysNativeMBToWide(const StringPiece& native_mb) {

- // TODO(evanm): we can't assume Linux is UTF-8.

- return SysUTF8ToWide(native_mb);

+ mbstate_t ps;

+ // Calculate the number of wide characters. We walk through the string

+ // without writing the output, counting the number of wide characters.

+ size_t num_out_chars = 0;

+ memset(&ps, 0, sizeof(ps));

+ for (size_t i = 0; i < native_mb.size(); ) {

+ const char* src = native_mb.data() + i;

+ size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -2:

+ case -1:

+ return std::wstring();

+ break;

+ case 0:

+ // We hit an embedded null byte, keep going.

+ i += 1; // Fall through.

+ default:

+ i += res;

+ ++num_out_chars;

+ break;

+ }

+ if (num_out_chars == 0)

+ return std::wstring();

+ std::wstring out;

+ out.resize(num_out_chars);

+ memset(&ps, 0, sizeof(ps)); // Clear the shift state.

+ // We walk the input string again, with |i| tracking the index of the

+ // multi-byte input, and |j| tracking the wide output.

+ for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {

+ const char* src = native_mb.data() + i;

+ wchar_t* dst = &out[j];

+ size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);

+ switch (res) {

+ // Handle any errors and return an empty string.

+ case -2:

+ case -1:

+ return std::wstring();

+ break;

+ case 0:

+ i += 1; // Skip null, fall through.

+ default:

+ i += res;

+ break;

+ }

+ return out;

}

} // namespace base

« no previous file with comments | « no previous file | base/sys_string_conversions_unittest.cc » ('j') | no next file with comments »