base/i18n/file_util_icu.cc - Issue 271056: Do some cleanup of file path name handling.

Unified Diff: base/i18n/file_util_icu.cc

Issue 271056: Do some cleanup of file path name handling. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/i18n/file_util_icu.cc

===================================================================

--- base/i18n/file_util_icu.cc (revision 29772)

+++ base/i18n/file_util_icu.cc (working copy)

@@ -124,50 +124,47 @@

return Singleton<IllegalCharacters>()->containsNone(file_name);

}

-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {

+void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,

+ char replace_char) {

DCHECK(file_name);

- DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) &&

- replace_char < 0x10000);

+ DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)));

// Remove leading and trailing whitespace.

TrimWhitespace(*file_name, TRIM_ALL, file_name);

- if (IsFilenameLegal(WideToUTF16(*file_name)))

- return;

+ IllegalCharacters* illegal = Singleton<IllegalCharacters>::get();

+ int cursor = 0; // The ICU macros expect an int.

+ while (cursor < static_cast<int>(file_name->size())) {

+ int char_begin = cursor;

+ uint32 code_point;

+#if defined(OS_MACOSX)

+ // Mac uses UTF-8 encoding for filenames.

+ U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),

+ code_point);

+#elif defined(OS_WIN)

+ // Windows uses UTF-16 encoding for filenames.

+ U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),

+ code_point);

+#elif defined(OS_LINUX)

+ // Linux doesn't actually define an encoding. It basically allows anything

+ // except for a few special ASCII characters.

+ unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);

+ if (cur_char >= 0x80)

+ continue;

+ code_point = cur_char;

+#else

+ NOTREACHED();

+#endif

- std::wstring::size_type i = 0;

- std::wstring::size_type length = file_name->size();

- const wchar_t* wstr = file_name->data();

-#if defined(WCHAR_T_IS_UTF16)

- // Using |span| method of UnicodeSet might speed things up a bit, but

- // it's not likely to matter here.

- std::wstring temp;

- temp.reserve(length);

- while (i < length) {

- UChar32 ucs4;

- std::wstring::size_type prev = i;

- U16_NEXT(wstr, i, length, ucs4);

- if (Singleton<IllegalCharacters>()->contains(ucs4)) {

- temp.push_back(replace_char);

- } else if (ucs4 < 0x10000) {

- temp.push_back(ucs4);

- } else {

- temp.push_back(wstr[prev]);

- temp.push_back(wstr[prev + 1]);

+ if (illegal->contains(code_point)) {

+ file_name->replace(char_begin, cursor - char_begin, 1, replace_char);

+ // We just made the potentially multi-byte/word char into one that only

+ // takes one byte/word, so need to adjust the cursor to point to the next

+ // character again.

+ cursor = char_begin + 1;

}

- file_name->swap(temp);

-#elif defined(WCHAR_T_IS_UTF32)

- while (i < length) {

- if (Singleton<IllegalCharacters>()->contains(wstr[i])) {

- (*file_name)[i] = replace_char;

- }

- ++i;

- }

-#else

-#error wchar_t* should be either UTF-16 or UTF-32

-#endif

}

bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {

« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »