base/sys_string_conversions_linux.cc - Issue 149065: Implement Linux sys_string_conversions using the system APIs.

Side by Side Diff: base/sys_string_conversions_linux.cc

Issue 149065: Implement Linux sys_string_conversions using the system APIs. (Closed)

Patch Set: Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/sys_string_conversions.h"	5 #include "base/sys_string_conversions.h"

6	6

	7 #include <wchar.h>

	8

7 #include "base/string_piece.h"	9 #include "base/string_piece.h"

8 #include "base/string_util.h"	10 #include "base/string_util.h"

9	11

10 namespace base {	12 namespace base {

11	13

12 std::string SysWideToUTF8(const std::wstring& wide) {	14 std::string SysWideToUTF8(const std::wstring& wide) {

13 // In theory this should be using the system-provided conversion rather	15 // In theory this should be using the system-provided conversion rather

14 // than our ICU, but this will do for now.	16 // than our ICU, but this will do for now.

15 return WideToUTF8(wide);	17 return WideToUTF8(wide);

16 }	18 }

17 std::wstring SysUTF8ToWide(const StringPiece& utf8) {	19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {

18 // In theory this should be using the system-provided conversion rather	20 // In theory this should be using the system-provided conversion rather

19 // than our ICU, but this will do for now.	21 // than our ICU, but this will do for now.

20 std::wstring out;	22 std::wstring out;

21 UTF8ToWide(utf8.data(), utf8.size(), &out);	23 UTF8ToWide(utf8.data(), utf8.size(), &out);

22 return out;	24 return out;

23 }	25 }

24	26

25 std::string SysWideToNativeMB(const std::wstring& wide) {	27 std::string SysWideToNativeMB(const std::wstring& wide) {

26 // TODO(evanm): we can't assume Linux is UTF-8.	28 mbstate_t ps;

27 return SysWideToUTF8(wide);	29

	30 // Calculate the number of multi-byte characters. We walk through the string

	31 // without writing the output, counting the number of multi-byte characters.

	32 size_t num_out_chars = 0;

	33 memset(&ps, 0, sizeof(ps));

	34 for (size_t i = 0; i < wide.size(); ++i) {

	35 const wchar_t src = wide[i];

	36 // Use a temp buffer since calling wcrtomb with an output of NULL does not

	37 // calculate the output length.

	38 char buf[16];

	39 // Skip NULLs to avoid wcrtomb's special handling of them.

	40 size_t res = src ? wcrtomb(buf, src, &ps) : 0;

	41 switch (res) {

	42 // Handle any errors and return an empty string.

	43 case -1:

	44 return std::string();

	45 break;

	46 case 0:

	47 // We hit an embedded null byte, keep going.

	48 ++num_out_chars;

	49 break;

	50 default:

	51 num_out_chars += res;

	52 break;

	53 }

	54 }

	55

	56 if (num_out_chars == 0)

	57 return std::string();

	58

	59 std::string out;

	60 out.resize(num_out_chars);

	61

	62 // We walk the input string again, with \|i\| tracking the index of the

	63 // wide input, and \|j\| tracking the multi-byte output.

	64 memset(&ps, 0, sizeof(ps));

	65 for (size_t i = 0, j = 0; i < wide.size(); ++i) {

	66 const wchar_t src = wide[i];

	67 // We don't want wcrtomb to do it's funkiness for embedded NULLs.

	68 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;

	69 switch (res) {

	70 // Handle any errors and return an empty string.

	71 case -1:

	72 return std::string();

	73 break;

	74 case 0:

	75 // We hit an embedded null byte, keep going.

	76 ++j; // Output is already zeroed.

	77 break;

	78 default:

	79 j += res;

	80 break;

	81 }

	82 }

	83

	84 return out;

28 }	85 }

29	86

30 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {	87 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {

31 // TODO(evanm): we can't assume Linux is UTF-8.	88 mbstate_t ps;
	Evan Martin 2009/06/26 15:19:26 Style frowns upon abbreviated variable names. "mb Style frowns upon abbreviated variable names. "mbstate" woulda worked. Dean McNamee 2009/06/26 15:23:07 Yeah, that's the style they give in the man page s Show quoted text On 2009/06/26 15:19:26, Evan Martin wrote: > Style frowns upon abbreviated variable names. "mbstate" woulda worked. Yeah, that's the style they give in the man page so I just copied it. You're probably right though.
32 return SysUTF8ToWide(native_mb);	89

	90 // Calculate the number of wide characters. We walk through the string

	91 // without writing the output, counting the number of wide characters.

	92 size_t num_out_chars = 0;

	93 memset(&ps, 0, sizeof(ps));

	94 for (size_t i = 0; i < native_mb.size(); ) {

	95 const char* src = native_mb.data() + i;

	96 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);

	97 switch (res) {

	98 // Handle any errors and return an empty string.

	99 case -2:

	100 case -1:

	101 return std::wstring();

	102 break;

	103 case 0:

	104 // We hit an embedded null byte, keep going.

	105 i += 1; // Fall through.

	106 default:

	107 i += res;

	108 ++num_out_chars;

	109 break;

	110 }

	111 }

	112

	113 if (num_out_chars == 0)

	114 return std::wstring();

	115

	116 std::wstring out;

	117 out.resize(num_out_chars);

	118

	119 memset(&ps, 0, sizeof(ps)); // Clear the shift state.

	120 // We walk the input string again, with \|i\| tracking the index of the

	121 // multi-byte input, and \|j\| tracking the wide output.

	122 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {

	123 const char* src = native_mb.data() + i;

	124 wchar_t* dst = &out[j];

	125 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);

	126 switch (res) {

	127 // Handle any errors and return an empty string.

	128 case -2:

	129 case -1:

	130 return std::wstring();

	131 break;

	132 case 0:

	133 i += 1; // Skip null byte.

	134 break;

	135 default:

	136 i += res;

	137 break;

	138 }

	139 }

	140

	141 return out;

33 }	142 }

34	143

35 } // namespace base	144 } // namespace base

OLD	NEW

« no previous file with comments | « no previous file | base/sys_string_conversions_unittest.cc » ('j') | base/sys_string_conversions_unittest.cc » ('J')