Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
| 6 | 6 |
| 7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
| 8 | 8 |
| 9 #include "base/files/file_path.h" | 9 #include "base/files/file_path.h" |
| 10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 23 namespace i18n { | 23 namespace i18n { |
| 24 | 24 |
| 25 namespace { | 25 namespace { |
| 26 | 26 |
| 27 class IllegalCharacters { | 27 class IllegalCharacters { |
| 28 public: | 28 public: |
| 29 static IllegalCharacters* GetInstance() { | 29 static IllegalCharacters* GetInstance() { |
| 30 return Singleton<IllegalCharacters>::get(); | 30 return Singleton<IllegalCharacters>::get(); |
| 31 } | 31 } |
| 32 | 32 |
| 33 bool contains(UChar32 ucs4) { | 33 bool DisallowedEverywhere(UChar32 ucs4) { |
| 34 return !!set->contains(ucs4); | 34 return !!illegal_anywhere_->contains(ucs4); |
| 35 } | 35 } |
| 36 | 36 |
| 37 bool containsNone(const string16 &s) { | 37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) { |
| 38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 38 return !!illegal_at_ends_->contains(ucs4); |
| 39 } | |
| 40 | |
| 41 bool IsAllowedName(const string16& s) { | |
| 42 return s.empty() || (!!illegal_anywhere_->containsNone( | |
| 43 icu::UnicodeString(s.c_str(), s.size())) && | |
| 44 !illegal_at_ends_->contains(*s.begin()) && | |
| 45 !illegal_at_ends_->contains(*s.rbegin())); | |
| 39 } | 46 } |
| 40 | 47 |
| 41 private: | 48 private: |
| 42 friend class Singleton<IllegalCharacters>; | 49 friend class Singleton<IllegalCharacters>; |
| 43 friend struct DefaultSingletonTraits<IllegalCharacters>; | 50 friend struct DefaultSingletonTraits<IllegalCharacters>; |
| 44 | 51 |
| 45 IllegalCharacters(); | 52 IllegalCharacters(); |
| 46 ~IllegalCharacters() { } | 53 ~IllegalCharacters() { } |
| 47 | 54 |
| 48 scoped_ptr<icu::UnicodeSet> set; | 55 // set of characters considered invalid anywhere inside a filename. |
| 56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_; | |
| 57 | |
| 58 // set of characters considered invalid at either end of a filename. | |
| 59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_; | |
| 49 | 60 |
| 50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); | 61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); |
| 51 }; | 62 }; |
| 52 | 63 |
| 53 IllegalCharacters::IllegalCharacters() { | 64 IllegalCharacters::IllegalCharacters() { |
| 54 UErrorCode status = U_ZERO_ERROR; | 65 UErrorCode everywhere_status = U_ZERO_ERROR; |
| 55 // Control characters, formatting characters, non-characters, and | 66 UErrorCode ends_status = U_ZERO_ERROR; |
| 56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | 67 // Control characters, formatting characters, non-characters, path separators, |
| 68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | |
| 57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx | 69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
| 58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx | 70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
| 59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they | 71 // Note that code points in the "Other, Format" (Cf) category are ignored on |
| 60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used | 72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being |
| 61 // elsewhere, they can be confusing/problematic. | 73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is |
| 62 // Also, consider wrapping the set with our Singleton class to create and | 74 // also excluded due to the possibility of interacting poorly with short |
| 63 // freeze it only once. Note that there's a trade-off between memory and | 75 // filenames on VFAT. (Related to CVE-2014-9390) |
| 64 // speed. | |
| 65 #if defined(WCHAR_T_IS_UTF16) | 76 #if defined(WCHAR_T_IS_UTF16) |
| 66 set.reset(new icu::UnicodeSet(icu::UnicodeString( | 77 illegal_anywhere_.reset( |
| 67 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); | 78 new icu::UnicodeSet(icu::UnicodeString(L"[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), |
| 79 everywhere_status)); | |
| 80 illegal_at_ends_.reset( | |
| 81 new icu::UnicodeSet(icu::UnicodeString(L"[[:WSpace:][.]]"), ends_status)); | |
| 68 #else | 82 #else |
| 69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( | 83 illegal_anywhere_.reset(new icu::UnicodeSet( |
| 70 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), | 84 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), |
| 71 status)); | 85 everywhere_status)); |
| 86 illegal_at_ends_.reset(new icu::UnicodeSet( | |
| 87 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); | |
|
jungshik at Google
2015/01/30 23:13:25
You don't need |#if defined| block any more (actua
asanka
2015/01/30 23:36:48
Done
| |
| 72 #endif | 88 #endif |
| 73 DCHECK(U_SUCCESS(status)); | 89 DCHECK(U_SUCCESS(everywhere_status)); |
| 90 DCHECK(U_SUCCESS(ends_status)); | |
| 91 | |
| 74 // Add non-characters. If this becomes a performance bottleneck by | 92 // Add non-characters. If this becomes a performance bottleneck by |
| 75 // any chance, do not add these to |set| and change IsFilenameLegal() | 93 // any chance, do not add these to |set| and change IsFilenameLegal() |
| 76 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling | 94 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling |
| 77 // containsNone(). | 95 // IsAllowedName(). |
| 78 set->add(0xFDD0, 0xFDEF); | 96 illegal_anywhere_->add(0xFDD0, 0xFDEF); |
| 79 for (int i = 0; i <= 0x10; ++i) { | 97 for (int i = 0; i <= 0x10; ++i) { |
| 80 int plane_base = 0x10000 * i; | 98 int plane_base = 0x10000 * i; |
| 81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 99 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
| 82 } | 100 } |
| 83 set->freeze(); | 101 illegal_anywhere_->freeze(); |
| 102 illegal_at_ends_->freeze(); | |
| 84 } | 103 } |
| 85 | 104 |
| 86 } // namespace | 105 } // namespace |
| 87 | 106 |
| 88 bool IsFilenameLegal(const string16& file_name) { | 107 bool IsFilenameLegal(const string16& file_name) { |
| 89 return IllegalCharacters::GetInstance()->containsNone(file_name); | 108 return IllegalCharacters::GetInstance()->IsAllowedName(file_name); |
| 90 } | 109 } |
| 91 | 110 |
| 92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 111 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
| 93 char replace_char) { | 112 char replace_char) { |
| 94 DCHECK(file_name); | 113 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); |
| 95 | 114 |
| 96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); | 115 DCHECK(!(illegal->DisallowedEverywhere(replace_char))); |
| 116 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); | |
| 97 | 117 |
| 98 // Remove leading and trailing whitespace. | |
| 99 TrimWhitespace(*file_name, TRIM_ALL, file_name); | |
| 100 | |
| 101 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); | |
| 102 int cursor = 0; // The ICU macros expect an int. | 118 int cursor = 0; // The ICU macros expect an int. |
| 103 while (cursor < static_cast<int>(file_name->size())) { | 119 while (cursor < static_cast<int>(file_name->size())) { |
| 104 int char_begin = cursor; | 120 int char_begin = cursor; |
| 105 uint32 code_point; | 121 uint32 code_point; |
| 106 #if defined(OS_MACOSX) | 122 #if defined(OS_MACOSX) |
| 107 // Mac uses UTF-8 encoding for filenames. | 123 // Mac uses UTF-8 encoding for filenames. |
| 108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 124 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 109 code_point); | 125 code_point); |
| 110 #elif defined(OS_WIN) | 126 #elif defined(OS_WIN) |
| 111 // Windows uses UTF-16 encoding for filenames. | 127 // Windows uses UTF-16 encoding for filenames. |
| 112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 128 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 113 code_point); | 129 code_point); |
| 114 #elif defined(OS_POSIX) | 130 #elif defined(OS_POSIX) |
| 115 // Linux doesn't actually define an encoding. It basically allows anything | 131 // Linux doesn't actually define an encoding. It basically allows anything |
| 116 // except for a few special ASCII characters. | 132 // except for a few special ASCII characters. |
| 117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); | 133 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); |
| 118 if (cur_char >= 0x80) | 134 if (cur_char >= 0x80) |
| 119 continue; | 135 continue; |
| 120 code_point = cur_char; | 136 code_point = cur_char; |
| 121 #else | 137 #else |
| 122 NOTREACHED(); | 138 NOTREACHED(); |
| 123 #endif | 139 #endif |
| 124 | 140 |
| 125 if (illegal->contains(code_point)) { | 141 if (illegal->DisallowedEverywhere(code_point) || |
| 142 ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && | |
| 143 illegal->DisallowedLeadingOrTrailing(code_point))) { | |
| 126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); | 144 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); |
| 127 // We just made the potentially multi-byte/word char into one that only | 145 // We just made the potentially multi-byte/word char into one that only |
| 128 // takes one byte/word, so need to adjust the cursor to point to the next | 146 // takes one byte/word, so need to adjust the cursor to point to the next |
| 129 // character again. | 147 // character again. |
| 130 cursor = char_begin + 1; | 148 cursor = char_begin + 1; |
| 131 } | 149 } |
| 132 } | 150 } |
| 133 } | 151 } |
| 134 | 152 |
| 135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 153 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), | 181 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), |
| 164 kCodepageUTF8, | 182 kCodepageUTF8, |
| 165 &normalized_str)) { | 183 &normalized_str)) { |
| 166 *file_name = file_name->DirName().Append(FilePath(normalized_str)); | 184 *file_name = file_name->DirName().Append(FilePath(normalized_str)); |
| 167 } | 185 } |
| 168 #endif | 186 #endif |
| 169 } | 187 } |
| 170 | 188 |
| 171 } // namespace i18n | 189 } // namespace i18n |
| 172 } // namespace base | 190 } // namespace base |
| OLD | NEW |