Index: base/i18n/file_util_icu.cc |
=================================================================== |
--- base/i18n/file_util_icu.cc (revision 29772) |
+++ base/i18n/file_util_icu.cc (working copy) |
@@ -124,50 +124,47 @@ |
return Singleton<IllegalCharacters>()->containsNone(file_name); |
} |
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { |
+void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
+ char replace_char) { |
DCHECK(file_name); |
- DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) && |
- replace_char < 0x10000); |
+ DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); |
// Remove leading and trailing whitespace. |
TrimWhitespace(*file_name, TRIM_ALL, file_name); |
- if (IsFilenameLegal(WideToUTF16(*file_name))) |
- return; |
+ IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); |
+ int cursor = 0; // The ICU macros expect an int. |
+ while (cursor < static_cast<int>(file_name->size())) { |
+ int char_begin = cursor; |
+ uint32 code_point; |
+#if defined(OS_MACOSX) |
+ // Mac uses UTF-8 encoding for filenames. |
+ U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
+ code_point); |
+#elif defined(OS_WIN) |
+ // Windows uses UTF-16 encoding for filenames. |
+ U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
+ code_point); |
+#elif defined(OS_LINUX) |
+ // Linux doesn't actually define an encoding. It basically allows anything |
+ // except for a few special ASCII characters. |
+ unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); |
+ if (cur_char >= 0x80) |
+ continue; |
+ code_point = cur_char; |
+#else |
+ NOTREACHED(); |
+#endif |
- std::wstring::size_type i = 0; |
- std::wstring::size_type length = file_name->size(); |
- const wchar_t* wstr = file_name->data(); |
-#if defined(WCHAR_T_IS_UTF16) |
- // Using |span| method of UnicodeSet might speed things up a bit, but |
- // it's not likely to matter here. |
- std::wstring temp; |
- temp.reserve(length); |
- while (i < length) { |
- UChar32 ucs4; |
- std::wstring::size_type prev = i; |
- U16_NEXT(wstr, i, length, ucs4); |
- if (Singleton<IllegalCharacters>()->contains(ucs4)) { |
- temp.push_back(replace_char); |
- } else if (ucs4 < 0x10000) { |
- temp.push_back(ucs4); |
- } else { |
- temp.push_back(wstr[prev]); |
- temp.push_back(wstr[prev + 1]); |
+ if (illegal->contains(code_point)) { |
+ file_name->replace(char_begin, cursor - char_begin, 1, replace_char); |
+ // We just made the potentially multi-byte/word char into one that only |
+ // takes one byte/word, so need to adjust the cursor to point to the next |
+ // character again. |
+ cursor = char_begin + 1; |
} |
} |
- file_name->swap(temp); |
-#elif defined(WCHAR_T_IS_UTF32) |
- while (i < length) { |
- if (Singleton<IllegalCharacters>()->contains(wstr[i])) { |
- (*file_name)[i] = replace_char; |
- } |
- ++i; |
- } |
-#else |
-#error wchar_t* should be either UTF-16 or UTF-32 |
-#endif |
} |
bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |