runtime/bin/utils_win.cc - Issue 1194883002: Improve the encoding/decoding to/from system encoding on Windows

Unified Diff: runtime/bin/utils_win.cc

Issue 1194883002: Improve the encoding/decoding to/from system encoding on Windows (Closed) Base URL: https://github.com/dart-lang/sdk.git@master

Patch Set: A few more comments Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: runtime/bin/utils_win.cc

diff --git a/runtime/bin/utils_win.cc b/runtime/bin/utils_win.cc

index dab73552daabeb9ddaae673b017785c65b4ffe68..4f87f9e8301207397a1c01118de01884e88de4c8 100644

--- a/runtime/bin/utils_win.cc

+++ b/runtime/bin/utils_win.cc

@@ -61,58 +61,85 @@ void OSError::SetCodeAndMessage(SubSystem sub_system, int code) {

free(utf8);

}

-char* StringUtils::ConsoleStringToUtf8(char* str) {

- int len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);

- wchar_t* unicode = new wchar_t[len+1];

- MultiByteToWideChar(CP_ACP, 0, str, -1, unicode, len);

- unicode[len] = '\0';

- char* utf8 = StringUtils::WideToUtf8(unicode);

- delete[] unicode;

+char* StringUtils::ConsoleStringToUtf8(char* str,

+ intptr_t len,

+ intptr_t* result_len) {

+ int wide_len = MultiByteToWideChar(CP_ACP, 0, str, len, NULL, 0);

+ wchar_t* wide = new wchar_t[wide_len];

+ MultiByteToWideChar(CP_ACP, 0, str, len, wide, wide_len);

+ char* utf8 = StringUtils::WideToUtf8(wide, wide_len, result_len);

+ delete[] wide;

kustermann 2015/06/22 11:11:21 The extra copy is a bit unfortunate :-/

Lasse Reichstein Nielsen 2015/06/22 12:08:34 But probably unavoidable if we want to use the sys

return utf8;

}

-char* StringUtils::Utf8ToConsoleString(char* utf8) {

- wchar_t* unicode = Utf8ToWide(utf8);

- int len = WideCharToMultiByte(CP_ACP, 0, unicode, -1, NULL, 0, NULL, NULL);

- char* ansi = reinterpret_cast<char*>(malloc(len + 1));

- WideCharToMultiByte(CP_ACP, 0, unicode, -1, ansi, len, NULL, NULL);

- ansi[len] = '\0';

- free(unicode);

+char* StringUtils::Utf8ToConsoleString(char* utf8,

+ intptr_t len,

+ intptr_t* result_len) {

+ intptr_t wide_len;

+ wchar_t* wide = Utf8ToWide(utf8, len, &wide_len);

kustermann 2015/06/22 11:11:21 Maybe qualify with StringUtils:: as you normally d

Søren Gjesse 2015/06/23 11:17:59 Done.

+ int system_len = WideCharToMultiByte(

+ CP_ACP, 0, wide, wide_len, NULL, 0, NULL, NULL);

+ char* ansi = reinterpret_cast<char*>(malloc(system_len + 1));

kustermann 2015/06/22 11:11:21 Do we not want to handle allocation failures? Als

Søren Gjesse 2015/06/23 11:17:59 Added a check here. However we don't handle failed

+ WideCharToMultiByte(CP_ACP, 0, wide, wide_len, ansi, system_len, NULL, NULL);

+ ansi[system_len] = '\0';

Lasse Reichstein Nielsen 2015/06/22 12:08:34 Should this only be set when result_len is NULL?

Søren Gjesse 2015/06/23 11:17:59 I removed this, as when len is -1 the NUL char is

+ free(wide);

+ if (result_len != NULL) {

+ *result_len = system_len;

+ }

return ansi;

}

-char* StringUtils::WideToUtf8(wchar_t* wide) {

- int len = WideCharToMultiByte(CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL);

- char* utf8 = reinterpret_cast<char*>(malloc(len + 1));

- WideCharToMultiByte(CP_UTF8, 0, wide, -1, utf8, len, NULL, NULL);

- utf8[len] = '\0';

+char* StringUtils::WideToUtf8(wchar_t* wide,

+ intptr_t len,

+ intptr_t* result_len) {

+ int utf8_len = WideCharToMultiByte(

+ CP_UTF8, 0, wide, len, NULL, 0, NULL, NULL);

+ char* utf8 = reinterpret_cast<char*>(malloc(utf8_len + 1));

+ WideCharToMultiByte(CP_UTF8, 0, wide, len, utf8, utf8_len, NULL, NULL);

+ utf8[utf8_len] = '\0';

Lasse Reichstein Nielsen 2015/06/22 12:08:34 Again, only include the extra NUL if result_len is

Søren Gjesse 2015/06/23 11:17:59 Removed, see above.

+ if (result_len != NULL) {

+ *result_len = utf8_len;

+ }

return utf8;

}

-wchar_t* StringUtils::Utf8ToWide(char* utf8) {

- int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);

- wchar_t* unicode =

- reinterpret_cast<wchar_t*>(malloc((len + 1) * sizeof(wchar_t)));

- MultiByteToWideChar(CP_UTF8, 0, utf8, -1, unicode, len);

- unicode[len] = '\0';

- return unicode;

+wchar_t* StringUtils::Utf8ToWide(char* utf8,

+ intptr_t len,

+ intptr_t* result_len) {

+ int wide_len = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0);

+ wchar_t* wide =

+ reinterpret_cast<wchar_t*>(malloc((wide_len + 1) * sizeof(wchar_t)));

kustermann 2015/06/22 11:11:21 The distinction between the number-of-bytes/number

Lasse Reichstein Nielsen 2015/06/22 12:08:34 Sadly because the function is documented as return

+ MultiByteToWideChar(CP_UTF8, 0, utf8, len, wide, wide_len);

+ wide[wide_len] = '\0';

kustermann 2015/06/22 11:11:21 Is this common to 0-terminate a 2-byte wide string

Søren Gjesse 2015/06/23 11:17:59 It is, all the w versions of string functions on W

+ if (result_len != NULL) {

+ *result_len = wide_len;

+ }

+ return wide;

}

-const char* StringUtils::Utf8ToConsoleString(const char* utf8) {

- return const_cast<const char*>(Utf8ToConsoleString(const_cast<char*>(utf8)));

+const char* StringUtils::Utf8ToConsoleString(

+ const char* utf8, intptr_t len, intptr_t* result_len) {

+ return const_cast<const char*>(

+ Utf8ToConsoleString(const_cast<char*>(utf8), len, result_len));

}

-const char* StringUtils::ConsoleStringToUtf8(const char* str) {

- return const_cast<const char*>(ConsoleStringToUtf8(const_cast<char*>(str)));

+const char* StringUtils::ConsoleStringToUtf8(

+ const char* str, intptr_t len, intptr_t* result_len) {

+ return const_cast<const char*>(

+ ConsoleStringToUtf8(const_cast<char*>(str), len, result_len));

}

-const char* StringUtils::WideToUtf8(const wchar_t* wide) {

- return const_cast<const char*>(WideToUtf8(const_cast<wchar_t*>(wide)));

+const char* StringUtils::WideToUtf8(

+ const wchar_t* wide, intptr_t len, intptr_t* result_len) {

+ return const_cast<const char*>(

+ WideToUtf8(const_cast<wchar_t*>(wide), len, result_len));

}

-const wchar_t* StringUtils::Utf8ToWide(const char* utf8) {

- return const_cast<const wchar_t*>(Utf8ToWide(const_cast<char*>(utf8)));

+const wchar_t* StringUtils::Utf8ToWide(

+ const char* utf8, intptr_t len, intptr_t* result_len) {

+ return const_cast<const wchar_t*>(

+ Utf8ToWide(const_cast<char*>(utf8), len, result_len));

}

wchar_t** ShellUtils::GetUnicodeArgv(int* argc) {

« runtime/bin/utils_android.cc ('K') | « runtime/bin/utils_macos.cc ('k') | tests/standalone/io/system_encoding_test.dart » ('j') | tests/standalone/io/system_encoding_test.dart » ('J')