OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
6 | 6 |
7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
8 | 8 |
9 #include "base/files/file_path.h" | 9 #include "base/files/file_path.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 12 matching lines...) Expand all Loading... |
23 namespace i18n { | 23 namespace i18n { |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 class IllegalCharacters { | 27 class IllegalCharacters { |
28 public: | 28 public: |
29 static IllegalCharacters* GetInstance() { | 29 static IllegalCharacters* GetInstance() { |
30 return Singleton<IllegalCharacters>::get(); | 30 return Singleton<IllegalCharacters>::get(); |
31 } | 31 } |
32 | 32 |
33 bool contains(UChar32 ucs4) { | 33 bool DisallowedEverywhere(UChar32 ucs4) { |
34 return !!set->contains(ucs4); | 34 return !!illegal_anywhere_->contains(ucs4); |
35 } | 35 } |
36 | 36 |
37 bool containsNone(const string16 &s) { | 37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) { |
38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 38 return !!illegal_at_ends_->contains(ucs4); |
| 39 } |
| 40 |
| 41 bool IsAllowedName(const string16& s) { |
| 42 return s.empty() || (!!illegal_anywhere_->containsNone( |
| 43 icu::UnicodeString(s.c_str(), s.size())) && |
| 44 !illegal_at_ends_->contains(*s.begin()) && |
| 45 !illegal_at_ends_->contains(*s.rbegin())); |
39 } | 46 } |
40 | 47 |
41 private: | 48 private: |
42 friend class Singleton<IllegalCharacters>; | 49 friend class Singleton<IllegalCharacters>; |
43 friend struct DefaultSingletonTraits<IllegalCharacters>; | 50 friend struct DefaultSingletonTraits<IllegalCharacters>; |
44 | 51 |
45 IllegalCharacters(); | 52 IllegalCharacters(); |
46 ~IllegalCharacters() { } | 53 ~IllegalCharacters() { } |
47 | 54 |
48 scoped_ptr<icu::UnicodeSet> set; | 55 // set of characters considered invalid anywhere inside a filename. |
| 56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_; |
| 57 |
| 58 // set of characters considered invalid at either end of a filename. |
| 59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_; |
49 | 60 |
50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); | 61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); |
51 }; | 62 }; |
52 | 63 |
53 IllegalCharacters::IllegalCharacters() { | 64 IllegalCharacters::IllegalCharacters() { |
54 UErrorCode status = U_ZERO_ERROR; | 65 UErrorCode everywhere_status = U_ZERO_ERROR; |
55 // Control characters, formatting characters, non-characters, and | 66 UErrorCode ends_status = U_ZERO_ERROR; |
56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | 67 // Control characters, formatting characters, non-characters, path separators, |
| 68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). |
57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx | 69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx | 70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they | 71 // Note that code points in the "Other, Format" (Cf) category are ignored on |
60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used | 72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being |
61 // elsewhere, they can be confusing/problematic. | 73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is |
62 // Also, consider wrapping the set with our Singleton class to create and | 74 // also excluded due to the possibility of interacting poorly with short |
63 // freeze it only once. Note that there's a trade-off between memory and | 75 // filenames on VFAT. (Related to CVE-2014-9390) |
64 // speed. | 76 illegal_anywhere_.reset(new icu::UnicodeSet( |
65 #if defined(WCHAR_T_IS_UTF16) | 77 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), |
66 set.reset(new icu::UnicodeSet(icu::UnicodeString( | 78 everywhere_status)); |
67 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); | 79 illegal_at_ends_.reset(new icu::UnicodeSet( |
68 #else | 80 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); |
69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( | 81 DCHECK(U_SUCCESS(everywhere_status)); |
70 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), | 82 DCHECK(U_SUCCESS(ends_status)); |
71 status)); | 83 |
72 #endif | |
73 DCHECK(U_SUCCESS(status)); | |
74 // Add non-characters. If this becomes a performance bottleneck by | 84 // Add non-characters. If this becomes a performance bottleneck by |
75 // any chance, do not add these to |set| and change IsFilenameLegal() | 85 // any chance, do not add these to |set| and change IsFilenameLegal() |
76 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling | 86 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling |
77 // containsNone(). | 87 // IsAllowedName(). |
78 set->add(0xFDD0, 0xFDEF); | 88 illegal_anywhere_->add(0xFDD0, 0xFDEF); |
79 for (int i = 0; i <= 0x10; ++i) { | 89 for (int i = 0; i <= 0x10; ++i) { |
80 int plane_base = 0x10000 * i; | 90 int plane_base = 0x10000 * i; |
81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 91 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
82 } | 92 } |
83 set->freeze(); | 93 illegal_anywhere_->freeze(); |
| 94 illegal_at_ends_->freeze(); |
84 } | 95 } |
85 | 96 |
86 } // namespace | 97 } // namespace |
87 | 98 |
88 bool IsFilenameLegal(const string16& file_name) { | 99 bool IsFilenameLegal(const string16& file_name) { |
89 return IllegalCharacters::GetInstance()->containsNone(file_name); | 100 return IllegalCharacters::GetInstance()->IsAllowedName(file_name); |
90 } | 101 } |
91 | 102 |
92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 103 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
93 char replace_char) { | 104 char replace_char) { |
94 DCHECK(file_name); | 105 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); |
95 | 106 |
96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); | 107 DCHECK(!(illegal->DisallowedEverywhere(replace_char))); |
| 108 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); |
97 | 109 |
98 // Remove leading and trailing whitespace. | |
99 TrimWhitespace(*file_name, TRIM_ALL, file_name); | |
100 | |
101 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); | |
102 int cursor = 0; // The ICU macros expect an int. | 110 int cursor = 0; // The ICU macros expect an int. |
103 while (cursor < static_cast<int>(file_name->size())) { | 111 while (cursor < static_cast<int>(file_name->size())) { |
104 int char_begin = cursor; | 112 int char_begin = cursor; |
105 uint32 code_point; | 113 uint32 code_point; |
106 #if defined(OS_MACOSX) | 114 #if defined(OS_MACOSX) |
107 // Mac uses UTF-8 encoding for filenames. | 115 // Mac uses UTF-8 encoding for filenames. |
108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 116 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
109 code_point); | 117 code_point); |
110 #elif defined(OS_WIN) | 118 #elif defined(OS_WIN) |
111 // Windows uses UTF-16 encoding for filenames. | 119 // Windows uses UTF-16 encoding for filenames. |
112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 120 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
113 code_point); | 121 code_point); |
114 #elif defined(OS_POSIX) | 122 #elif defined(OS_POSIX) |
115 // Linux doesn't actually define an encoding. It basically allows anything | 123 // Linux doesn't actually define an encoding. It basically allows anything |
116 // except for a few special ASCII characters. | 124 // except for a few special ASCII characters. |
117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); | 125 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); |
118 if (cur_char >= 0x80) | 126 if (cur_char >= 0x80) |
119 continue; | 127 continue; |
120 code_point = cur_char; | 128 code_point = cur_char; |
121 #else | 129 #else |
122 NOTREACHED(); | 130 NOTREACHED(); |
123 #endif | 131 #endif |
124 | 132 |
125 if (illegal->contains(code_point)) { | 133 if (illegal->DisallowedEverywhere(code_point) || |
| 134 ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && |
| 135 illegal->DisallowedLeadingOrTrailing(code_point))) { |
126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); | 136 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); |
127 // We just made the potentially multi-byte/word char into one that only | 137 // We just made the potentially multi-byte/word char into one that only |
128 // takes one byte/word, so need to adjust the cursor to point to the next | 138 // takes one byte/word, so need to adjust the cursor to point to the next |
129 // character again. | 139 // character again. |
130 cursor = char_begin + 1; | 140 cursor = char_begin + 1; |
131 } | 141 } |
132 } | 142 } |
133 } | 143 } |
134 | 144 |
135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 145 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
(...skipping 27 matching lines...) Expand all Loading... |
163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), | 173 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), |
164 kCodepageUTF8, | 174 kCodepageUTF8, |
165 &normalized_str)) { | 175 &normalized_str)) { |
166 *file_name = file_name->DirName().Append(FilePath(normalized_str)); | 176 *file_name = file_name->DirName().Append(FilePath(normalized_str)); |
167 } | 177 } |
168 #endif | 178 #endif |
169 } | 179 } |
170 | 180 |
171 } // namespace i18n | 181 } // namespace i18n |
172 } // namespace base | 182 } // namespace base |
OLD | NEW |