chrome/install_static/install_util.cc - Issue 2543503003: Handle spaces and quotes in chrome_elf command line parser

Unified Diff: chrome/install_static/install_util.cc

Issue 2543503003: Handle spaces and quotes in chrome_elf command line parser (Closed)

Patch Set: use wcs* Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « chrome/install_static/install_util.h ('k') | chrome/install_static/install_util_unittest.cc » ('j') | chrome/install_static/install_util_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/install_static/install_util.cc

diff --git a/chrome/install_static/install_util.cc b/chrome/install_static/install_util.cc

index fcbcb4ffbd6e2100d93ba307234147bd61e1ca38..8e4b4fceff4522dd65a281cbeed920f7485ccbf5 100644

--- a/chrome/install_static/install_util.cc

+++ b/chrome/install_static/install_util.cc

@@ -581,46 +581,106 @@ std::vector<std::wstring> TokenizeString16(const std::wstring& str,

return TokenizeStringT<std::wstring>(str, delimiter, trim_spaces);

}

+std::vector<std::wstring> TokenizeCommandLineToArray(

+ const std::wstring& command_line) {

+ // This is baroquely complex to do properly, see e.g.

+ // https://blogs.msdn.microsoft.com/oldnewthing/20100917-00/?p=12833

+ // http://www.windowsinspired.com/how-a-windows-programs-splits-its-command-line-into-individual-arguments/

+ // and many others. We cannot use CommandLineToArgvW() in chrome_elf, because

+ // it's in shell32.dll. Previously, __wgetmainargs() in the CRT was available,

+ // and it's still documented for VS 2015 at

+ // https://msdn.microsoft.com/en-us/library/ff770599.aspx but unfortunately,

+ // isn't actually available.

+ //

+ // This parsing matches CommandLineToArgvW()s for arguments, rather than the

+ // CRTs. These are different only in the most obscure of cases and will not

+ // matter in any practical situation. See the windowsinspired.com post above

+ // for details.

+ //

+ // TODO(scottmg): argv[0] normally has special handling. We should likely do

+ // that.

+ enum class SpecialChars {

grt (UTC plus 2) 2016/12/01 09:07:13 i think some comments in here are worthwhile. i've

scottmg 2016/12/01 19:04:21 Done.

+ kInterpret,

grt (UTC plus 2) 2016/12/01 09:07:13 // Space or tab, if encountered, delimit a break b

scottmg 2016/12/01 19:04:21 Done.

+ kIgnore,

grt (UTC plus 2) 2016/12/01 09:07:12 // Space or tab, if encountered, are part of the c

scottmg 2016/12/01 19:04:21 Done.

+ } state;

+ std::vector<std::wstring> result;

+ std::wstring token;

+ const wchar_t* p = command_line.c_str();

+ // This loops the entire string, with a subloop for each argument.

+ for (;;) {

+ // Advance past leading whitespace (only space and tab are handled).

+ p += wcsspn(p, L" \t");

+ // End of arguments.

+ if (p[0] == 0) {

+ if (!token.empty())

+ result.push_back(token);

+ break;

+ }

+ state = SpecialChars::kInterpret;

+ // Scan an argument.

+ for (;;) {

+ // Count and advance past leading backslashes.

grt (UTC plus 2) 2016/12/01 09:07:12 nit: since this counts all backslashes, not only l

scottmg 2016/12/01 19:04:21 Done.

+ int num_backslashes = 0;

+ while (p[0] == L'\\') {

+ ++p;

+ ++num_backslashes;

+ }

+ if (p[0] == L'"') {

+ for (int i = 1; i < num_backslashes; i += 2)

grt (UTC plus 2) 2016/12/01 09:07:13 tiny nit: a single append will be more performant

scottmg 2016/12/01 19:04:22 Done. (num_backslashes / 2) though.

grt (UTC plus 2) 2016/12/02 10:18:03 Oops. Cool.

+ token += L'\\';

+ if (num_backslashes % 2 == 1) {

+ token += L'"';

grt (UTC plus 2) 2016/12/01 09:07:13 // An odd number of backslashes followed by a quot

scottmg 2016/12/01 19:04:21 Done.

+ } else if (p[1] == L'"' && state == SpecialChars::kIgnore) {

grt (UTC plus 2) 2016/12/01 09:07:13 will this not be an OOB read for a string that end

scottmg 2016/12/01 19:04:21 I think it's safe, because .c_str() is null termin

+ ++p;

grt (UTC plus 2) 2016/12/01 09:07:12 // Special case for consecutive double quotes whil

scottmg 2016/12/01 19:04:22 Done.

+ token += L'"';

+ state = SpecialChars::kInterpret;

+ } else {

+ state = state == SpecialChars::kInterpret ? SpecialChars::kIgnore

+ : SpecialChars::kInterpret;

+ }

+ } else {

+ for (int i = 0; i < num_backslashes; ++i)

grt (UTC plus 2) 2016/12/01 09:07:13 // Emit backslashes that do not precede a double q

scottmg 2016/12/01 19:04:22 Done.

+ token += L'\\';

+ if (p[0] == 0 ||

+ (wcschr(L" \t", p[0]) && state == SpecialChars::kInterpret)) {

grt (UTC plus 2) 2016/12/01 09:07:12 nit: swap these two so that the cheap condition is

scottmg 2016/12/01 19:04:21 Done.

+ result.push_back(token);

+ token.clear();

+ break;

+ } else {

grt (UTC plus 2) 2016/12/01 09:07:12 nit: omit "else" since the "if" branch breaks for

scottmg 2016/12/01 19:04:21 Done.

+ token += *p;

+ }

+ ++p;

+ }

+ return result;

std::wstring GetSwitchValueFromCommandLine(const std::wstring& command_line,

const std::wstring& switch_name) {

assert(!command_line.empty());

assert(!switch_name.empty());

- std::wstring command_line_copy = command_line;

- // Remove leading and trailing spaces.

- TrimT<std::wstring>(&command_line_copy);

- // Find the switch in the command line. If we don't find the switch, return

- // an empty string.

- std::wstring switch_token = L"--";

- switch_token += switch_name;

- switch_token += L"=";

- size_t switch_offset = command_line_copy.find(switch_token);

- if (switch_offset == std::string::npos)

- return std::wstring();

- // The format is "--<switch name>=blah". Look for a space after the

- // "--<switch name>=" string. If we don't find a space assume that the switch

- // value ends at the end of the command line.

- size_t switch_value_start_offset = switch_offset + switch_token.length();

- if (std::wstring(kWhiteSpaces16).find(

- command_line_copy[switch_value_start_offset]) != std::wstring::npos) {

- switch_value_start_offset = command_line_copy.find_first_not_of(

- GetWhiteSpacesForType<std::wstring>(), switch_value_start_offset);

- if (switch_value_start_offset == std::wstring::npos)

- return std::wstring();

+ std::vector<std::wstring> as_array = TokenizeCommandLineToArray(command_line);

+ std::wstring switch_with_equal = L"--" + switch_name + L"=";

+ for (size_t i = 1; i < as_array.size(); ++i) {

grt (UTC plus 2) 2016/12/01 09:07:12 nit for (const std::wstring& arg : as_array) {

scottmg 2016/12/01 19:04:22 I'd prefer that, but is there an easy way to do th

grt (UTC plus 2) 2016/12/02 10:18:03 oh, 1, duh. apologies

+ const std::wstring& arg = as_array[i];

+ if (arg.compare(0, switch_with_equal.size(), switch_with_equal) == 0) {

grt (UTC plus 2) 2016/12/01 09:07:13 nit: omit braces

scottmg 2016/12/01 19:04:22 Heh, done. I forgot to switch from mark@ back to g

+ return arg.substr(switch_with_equal.size());

+ }

}

- size_t switch_value_end_offset =

- command_line_copy.find_first_of(GetWhiteSpacesForType<std::wstring>(),

- switch_value_start_offset);

- if (switch_value_end_offset == std::wstring::npos)

- switch_value_end_offset = command_line_copy.length();

- std::wstring switch_value = command_line_copy.substr(

- switch_value_start_offset,

- switch_value_end_offset - (switch_offset + switch_token.length()));

- TrimT<std::wstring>(&switch_value);

- return switch_value;

+ return std::wstring();

}

bool RecursiveDirectoryCreate(const std::wstring& full_path) {

« no previous file with comments | « chrome/install_static/install_util.h ('k') | chrome/install_static/install_util_unittest.cc » ('j') | chrome/install_static/install_util_unittest.cc » ('J')