| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
| 6 | 6 |
| 7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
| 8 | 8 |
| 9 #include "base/files/file_path.h" | 9 #include "base/files/file_path.h" |
| 10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 namespace i18n { | 23 namespace i18n { |
| 24 | 24 |
| 25 namespace { | 25 namespace { |
| 26 | 26 |
| 27 class IllegalCharacters { | 27 class IllegalCharacters { |
| 28 public: | 28 public: |
| 29 static IllegalCharacters* GetInstance() { | 29 static IllegalCharacters* GetInstance() { |
| 30 return Singleton<IllegalCharacters>::get(); | 30 return Singleton<IllegalCharacters>::get(); |
| 31 } | 31 } |
| 32 | 32 |
| 33 bool contains(UChar32 ucs4) { | 33 bool DisallowedEverywhere(UChar32 ucs4) { |
| 34 return !!set->contains(ucs4); | 34 return !!illegal_anywhere_->contains(ucs4); |
| 35 } | 35 } |
| 36 | 36 |
| 37 bool containsNone(const string16 &s) { | 37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) { |
| 38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 38 return !!illegal_at_ends_->contains(ucs4); |
| 39 } |
| 40 |
| 41 bool IsAllowedName(const string16& s) { |
| 42 return s.empty() || (!!illegal_anywhere_->containsNone( |
| 43 icu::UnicodeString(s.c_str(), s.size())) && |
| 44 !illegal_at_ends_->contains(*s.begin()) && |
| 45 !illegal_at_ends_->contains(*s.rbegin())); |
| 39 } | 46 } |
| 40 | 47 |
| 41 private: | 48 private: |
| 42 friend class Singleton<IllegalCharacters>; | 49 friend class Singleton<IllegalCharacters>; |
| 43 friend struct DefaultSingletonTraits<IllegalCharacters>; | 50 friend struct DefaultSingletonTraits<IllegalCharacters>; |
| 44 | 51 |
| 45 IllegalCharacters(); | 52 IllegalCharacters(); |
| 46 ~IllegalCharacters() { } | 53 ~IllegalCharacters() { } |
| 47 | 54 |
| 48 scoped_ptr<icu::UnicodeSet> set; | 55 // set of characters considered invalid anywhere inside a filename. |
| 56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_; |
| 57 |
| 58 // set of characters considered invalid at either end of a filename. |
| 59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_; |
| 49 | 60 |
| 50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); | 61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); |
| 51 }; | 62 }; |
| 52 | 63 |
| 53 IllegalCharacters::IllegalCharacters() { | 64 IllegalCharacters::IllegalCharacters() { |
| 54 UErrorCode status = U_ZERO_ERROR; | 65 UErrorCode everywhere_status = U_ZERO_ERROR; |
| 55 // Control characters, formatting characters, non-characters, and | 66 UErrorCode ends_status = U_ZERO_ERROR; |
| 56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | 67 // Control characters, formatting characters, non-characters, path separators, |
| 68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). |
| 57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx | 69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
| 58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx | 70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
| 59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they | 71 // Note that code points in the "Other, Format" (Cf) category are ignored on |
| 60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used | 72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being |
| 61 // elsewhere, they can be confusing/problematic. | 73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is |
| 62 // Also, consider wrapping the set with our Singleton class to create and | 74 // also excluded due to the possibility of interacting poorly with short |
| 63 // freeze it only once. Note that there's a trade-off between memory and | 75 // filenames on VFAT. (Related to CVE-2014-9390) |
| 64 // speed. | 76 illegal_anywhere_.reset(new icu::UnicodeSet( |
| 65 #if defined(WCHAR_T_IS_UTF16) | 77 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), |
| 66 set.reset(new icu::UnicodeSet(icu::UnicodeString( | 78 everywhere_status)); |
| 67 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); | 79 illegal_at_ends_.reset(new icu::UnicodeSet( |
| 68 #else | 80 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); |
| 69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( | 81 DCHECK(U_SUCCESS(everywhere_status)); |
| 70 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), | 82 DCHECK(U_SUCCESS(ends_status)); |
| 71 status)); | 83 |
| 72 #endif | |
| 73 DCHECK(U_SUCCESS(status)); | |
| 74 // Add non-characters. If this becomes a performance bottleneck by | 84 // Add non-characters. If this becomes a performance bottleneck by |
| 75 // any chance, do not add these to |set| and change IsFilenameLegal() | 85 // any chance, do not add these to |set| and change IsFilenameLegal() |
| 76 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling | 86 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling |
| 77 // containsNone(). | 87 // IsAllowedName(). |
| 78 set->add(0xFDD0, 0xFDEF); | 88 illegal_anywhere_->add(0xFDD0, 0xFDEF); |
| 79 for (int i = 0; i <= 0x10; ++i) { | 89 for (int i = 0; i <= 0x10; ++i) { |
| 80 int plane_base = 0x10000 * i; | 90 int plane_base = 0x10000 * i; |
| 81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 91 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
| 82 } | 92 } |
| 83 set->freeze(); | 93 illegal_anywhere_->freeze(); |
| 94 illegal_at_ends_->freeze(); |
| 84 } | 95 } |
| 85 | 96 |
| 86 } // namespace | 97 } // namespace |
| 87 | 98 |
| 88 bool IsFilenameLegal(const string16& file_name) { | 99 bool IsFilenameLegal(const string16& file_name) { |
| 89 return IllegalCharacters::GetInstance()->containsNone(file_name); | 100 return IllegalCharacters::GetInstance()->IsAllowedName(file_name); |
| 90 } | 101 } |
| 91 | 102 |
| 92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 103 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
| 93 char replace_char) { | 104 char replace_char) { |
| 94 DCHECK(file_name); | 105 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); |
| 95 | 106 |
| 96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); | 107 DCHECK(!(illegal->DisallowedEverywhere(replace_char))); |
| 108 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); |
| 97 | 109 |
| 98 // Remove leading and trailing whitespace. | |
| 99 TrimWhitespace(*file_name, TRIM_ALL, file_name); | |
| 100 | |
| 101 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); | |
| 102 int cursor = 0; // The ICU macros expect an int. | 110 int cursor = 0; // The ICU macros expect an int. |
| 103 while (cursor < static_cast<int>(file_name->size())) { | 111 while (cursor < static_cast<int>(file_name->size())) { |
| 104 int char_begin = cursor; | 112 int char_begin = cursor; |
| 105 uint32 code_point; | 113 uint32 code_point; |
| 106 #if defined(OS_MACOSX) | 114 #if defined(OS_MACOSX) |
| 107 // Mac uses UTF-8 encoding for filenames. | 115 // Mac uses UTF-8 encoding for filenames. |
| 108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 116 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 109 code_point); | 117 code_point); |
| 110 #elif defined(OS_WIN) | 118 #elif defined(OS_WIN) |
| 111 // Windows uses UTF-16 encoding for filenames. | 119 // Windows uses UTF-16 encoding for filenames. |
| 112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 120 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 113 code_point); | 121 code_point); |
| 114 #elif defined(OS_POSIX) | 122 #elif defined(OS_POSIX) |
| 115 // Linux doesn't actually define an encoding. It basically allows anything | 123 // Linux doesn't actually define an encoding. It basically allows anything |
| 116 // except for a few special ASCII characters. | 124 // except for a few special ASCII characters. |
| 117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); | 125 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); |
| 118 if (cur_char >= 0x80) | 126 if (cur_char >= 0x80) |
| 119 continue; | 127 continue; |
| 120 code_point = cur_char; | 128 code_point = cur_char; |
| 121 #else | 129 #else |
| 122 NOTREACHED(); | 130 NOTREACHED(); |
| 123 #endif | 131 #endif |
| 124 | 132 |
| 125 if (illegal->contains(code_point)) { | 133 if (illegal->DisallowedEverywhere(code_point) || |
| 134 ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && |
| 135 illegal->DisallowedLeadingOrTrailing(code_point))) { |
| 126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); | 136 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); |
| 127 // We just made the potentially multi-byte/word char into one that only | 137 // We just made the potentially multi-byte/word char into one that only |
| 128 // takes one byte/word, so need to adjust the cursor to point to the next | 138 // takes one byte/word, so need to adjust the cursor to point to the next |
| 129 // character again. | 139 // character again. |
| 130 cursor = char_begin + 1; | 140 cursor = char_begin + 1; |
| 131 } | 141 } |
| 132 } | 142 } |
| 133 } | 143 } |
| 134 | 144 |
| 135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 145 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
| (...skipping 27 matching lines...) Expand all Loading... |
| 163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), | 173 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), |
| 164 kCodepageUTF8, | 174 kCodepageUTF8, |
| 165 &normalized_str)) { | 175 &normalized_str)) { |
| 166 *file_name = file_name->DirName().Append(FilePath(normalized_str)); | 176 *file_name = file_name->DirName().Append(FilePath(normalized_str)); |
| 167 } | 177 } |
| 168 #endif | 178 #endif |
| 169 } | 179 } |
| 170 | 180 |
| 171 } // namespace i18n | 181 } // namespace i18n |
| 172 } // namespace base | 182 } // namespace base |
| OLD | NEW |