base/sys_string_conversions_linux.cc - Issue 140062: Implement Linux sys_string_conversions using the system APIs.

Side by Side Diff: base/sys_string_conversions_linux.cc

Issue 140062: Implement Linux sys_string_conversions using the system APIs. (Closed)

Patch Set: Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/sys_string_conversions.h"	5 #include "base/sys_string_conversions.h"

6	6

	7 #include <wchar.h>

	8

7 #include "base/string_piece.h"	9 #include "base/string_piece.h"

8 #include "base/string_util.h"	10 #include "base/string_util.h"

9	11

10 namespace base {	12 namespace base {

11	13

12 std::string SysWideToUTF8(const std::wstring& wide) {	14 std::string SysWideToUTF8(const std::wstring& wide) {

13 // In theory this should be using the system-provided conversion rather	15 // In theory this should be using the system-provided conversion rather

14 // than our ICU, but this will do for now.	16 // than our ICU, but this will do for now.

15 return WideToUTF8(wide);	17 return WideToUTF8(wide);

16 }	18 }

17 std::wstring SysUTF8ToWide(const StringPiece& utf8) {	19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {

18 // In theory this should be using the system-provided conversion rather	20 // In theory this should be using the system-provided conversion rather

19 // than our ICU, but this will do for now.	21 // than our ICU, but this will do for now.

20 std::wstring out;	22 std::wstring out;

21 UTF8ToWide(utf8.data(), utf8.size(), &out);	23 UTF8ToWide(utf8.data(), utf8.size(), &out);

22 return out;	24 return out;

23 }	25 }

24	26

25 std::string SysWideToNativeMB(const std::wstring& wide) {	27 std::string SysWideToNativeMB(const std::wstring& wide) {

26 // TODO(evanm): we can't assume Linux is UTF-8.	28 mbstate_t ps;

27 return SysWideToUTF8(wide);	29

	30 // Calculate the number of multi-byte characters. We walk through the string

	31 // without writing the output, counting the number of multi-byte characters.

	32 size_t num_out_chars = 0;

	33 memset(&ps, 0, sizeof(ps));

	34 for (size_t i = 0; i < wide.size(); ++i) {

	35 const wchar_t src = wide[i];

	36 // Use a temp buf since a output of NULL does not do what we want.
	Evan Martin 2009/06/22 15:22:56 s/a/an/ This comment doesn't really explain anythi s/a/an/ This comment doesn't really explain anything (where does "output of NULL" come from?) Dean McNamee 2009/06/22 15:26:02 Ok, I'll expand on it. Basically: A third Show quoted text On 2009/06/22 15:22:56, Evan Martin wrote: > s/a/an/ > This comment doesn't really explain anything (where does "output of NULL" come > from?) Ok, I'll expand on it. Basically: A third case is when s is NULL. In this case wc is ignored, and the function effectively returns wcrtomb(buf,L’\0’,ps) where buf is an internal anonymous buffer. Which is not the behavior we want at all. So we need to do the write to a temporary buffer and just ignore the output, so we can the size value correct.
	37 char buf[16];

	38 // We don't want wcrtomb to do it's funkiness for embedded NULLs.
	Evan Martin 2009/06/22 15:22:56 s/it's/its/ See above request for a descriptive c s/it's/its/ See above request for a descriptive comment.
	39 size_t res = src ? wcrtomb(buf, src, &ps) : 0;

	40 switch (res) {

	41 // Handle any errors and return an empty string.

	42 case -1:

	43 return std::string();

	44 break;

	45 case 0:

	46 // We hit an embedded null byte, keep going.

	47 ++num_out_chars;

	48 default:

	49 num_out_chars += res;
	Evan Martin 2009/06/22 15:22:56 Is it correct to do this if res is negative but no Is it correct to do this if res is negative but not -1? I think more common for this kind of API is if (res < 0) { // error handling, bailing out } if (res == 0) { special case } else { normal case } Dean McNamee 2009/06/22 15:26:02 res is a size_t (unsigned). Show quoted text On 2009/06/22 15:22:56, Evan Martin wrote: > Is it correct to do this if res is negative but not -1? > I think more common for this kind of API is > > if (res < 0) { > // error handling, bailing out > } res is a size_t (unsigned). Show quoted text > > if (res == 0) { > special case > } else { > normal case > }
	50 break;

	51 }

	52 }

	53

	54 if (num_out_chars == 0)

	55 return std::string();

	56

	57 std::string out;

	58 out.resize(num_out_chars);

	59

	60 // We walk the input string again, with \|i\| tracking the index of the

	61 // wide input, and \|j\| tracking the multi-byte output.

	62 memset(&ps, 0, sizeof(ps));

	63 for (size_t i = 0, j = 0; i < wide.size(); ++i) {

	64 const wchar_t src = wide[i];

	65 // We don't want wcrtomb to do it's funkiness for embedded NULLs.

	66 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;

	67 switch (res) {

	68 // Handle any errors and return an empty string.

	69 case -1:

	70 return std::string();

	71 break;

	72 case 0:

	73 // We hit an embedded null byte, keep going.

	74 ++j; // Output is already 0.

	75 default:

	76 j += res;

	77 break;

	78 }

	79 }

	80

	81 return out;

28 }	82 }

29	83

30 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {	84 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {

31 // TODO(evanm): we can't assume Linux is UTF-8.	85 mbstate_t ps;

32 return SysUTF8ToWide(native_mb);	86

	87 // Calculate the number of wide characters. We walk through the string

	88 // without writing the output, counting the number of wide characters.

	89 size_t num_out_chars = 0;

	90 memset(&ps, 0, sizeof(ps));

	91 for (size_t i = 0; i < native_mb.size(); ) {

	92 const char* src = native_mb.data() + i;

	93 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);

	94 switch (res) {

	95 // Handle any errors and return an empty string.

	96 case -2:

	97 case -1:

	98 return std::wstring();

	99 break;

	100 case 0:

	101 // We hit an embedded null byte, keep going.

	102 i += 1; // Fall through.

	103 default:

	104 i += res;

	105 ++num_out_chars;

	106 break;

	107 }

	108 }

	109

	110 if (num_out_chars == 0)

	111 return std::wstring();

	112

	113 std::wstring out;

	114 out.resize(num_out_chars);

	115

	116 memset(&ps, 0, sizeof(ps)); // Clear the shift state.

	117 // We walk the input string again, with \|i\| tracking the index of the

	118 // multi-byte input, and \|j\| tracking the wide output.

	119 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {

	120 const char* src = native_mb.data() + i;

	121 wchar_t* dst = &out[j];

	122 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);

	123 switch (res) {

	124 // Handle any errors and return an empty string.

	125 case -2:

	126 case -1:

	127 return std::wstring();

	128 break;

	129 case 0:

	130 i += 1; // Skip null, fall through.

	131 default:

	132 i += res;

	133 break;

	134 }

	135 }

	136

	137 return out;

33 }	138 }

34	139

35 } // namespace base	140 } // namespace base

OLD	NEW

« no previous file with comments | « no previous file | base/sys_string_conversions_unittest.cc » ('j') | no next file with comments »