OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
6 | 6 |
7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
8 | 8 |
9 #include "base/files/file_path.h" | 9 #include "base/files/file_path.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 12 matching lines...) Expand all Loading... | |
23 namespace i18n { | 23 namespace i18n { |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 class IllegalCharacters { | 27 class IllegalCharacters { |
28 public: | 28 public: |
29 static IllegalCharacters* GetInstance() { | 29 static IllegalCharacters* GetInstance() { |
30 return Singleton<IllegalCharacters>::get(); | 30 return Singleton<IllegalCharacters>::get(); |
31 } | 31 } |
32 | 32 |
33 bool contains(UChar32 ucs4) { | 33 bool DisallowedEverywhere(UChar32 ucs4) { |
34 return !!set->contains(ucs4); | 34 return !!illegal_anywhere_->contains(ucs4); |
35 } | 35 } |
36 | 36 |
37 bool containsNone(const string16 &s) { | 37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) { |
38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 38 return !!illegal_at_ends_->contains(ucs4); |
39 } | |
40 | |
41 bool IsAllowedName(const string16& s) { | |
42 return s.empty() || (!!illegal_anywhere_->containsNone( | |
43 icu::UnicodeString(s.c_str(), s.size())) && | |
44 !illegal_at_ends_->contains(*s.begin()) && | |
45 !illegal_at_ends_->contains(*s.rbegin())); | |
39 } | 46 } |
40 | 47 |
41 private: | 48 private: |
42 friend class Singleton<IllegalCharacters>; | 49 friend class Singleton<IllegalCharacters>; |
43 friend struct DefaultSingletonTraits<IllegalCharacters>; | 50 friend struct DefaultSingletonTraits<IllegalCharacters>; |
44 | 51 |
45 IllegalCharacters(); | 52 IllegalCharacters(); |
46 ~IllegalCharacters() { } | 53 ~IllegalCharacters() { } |
47 | 54 |
48 scoped_ptr<icu::UnicodeSet> set; | 55 // set of characters considered invalid anywhere inside a filename. |
56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_; | |
57 | |
58 // set of characters considered invalid at either end of a filename. | |
59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_; | |
49 | 60 |
50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); | 61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); |
51 }; | 62 }; |
52 | 63 |
53 IllegalCharacters::IllegalCharacters() { | 64 IllegalCharacters::IllegalCharacters() { |
54 UErrorCode status = U_ZERO_ERROR; | 65 UErrorCode everywhere_status = U_ZERO_ERROR; |
55 // Control characters, formatting characters, non-characters, and | 66 UErrorCode ends_status = U_ZERO_ERROR; |
56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | 67 // Control characters, formatting characters, non-characters, path separators, |
68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | |
57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx | 69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx | 70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they | 71 // Note that code points in the "Other, Format" (Cf) category are ignored on |
60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used | 72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being |
61 // elsewhere, they can be confusing/problematic. | 73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is |
62 // Also, consider wrapping the set with our Singleton class to create and | 74 // also excluded due to the possibility of interacting poorly with short |
63 // freeze it only once. Note that there's a trade-off between memory and | 75 // filenames on VFAT. (Related to CVE-2014-9390) |
64 // speed. | |
65 #if defined(WCHAR_T_IS_UTF16) | 76 #if defined(WCHAR_T_IS_UTF16) |
66 set.reset(new icu::UnicodeSet(icu::UnicodeString( | 77 illegal_anywhere_.reset( |
67 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); | 78 new icu::UnicodeSet(icu::UnicodeString(L"[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), |
79 everywhere_status)); | |
80 illegal_at_ends_.reset( | |
81 new icu::UnicodeSet(icu::UnicodeString(L"[[:WSpace:][.]]"), ends_status)); | |
68 #else | 82 #else |
69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( | 83 illegal_anywhere_.reset(new icu::UnicodeSet( |
70 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), | 84 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), |
71 status)); | 85 everywhere_status)); |
86 illegal_at_ends_.reset(new icu::UnicodeSet( | |
87 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); | |
jungshik at Google
2015/01/30 23:13:25
You don't need |#if defined| block any more (actua
asanka
2015/01/30 23:36:48
Done
| |
72 #endif | 88 #endif |
73 DCHECK(U_SUCCESS(status)); | 89 DCHECK(U_SUCCESS(everywhere_status)); |
90 DCHECK(U_SUCCESS(ends_status)); | |
91 | |
74 // Add non-characters. If this becomes a performance bottleneck by | 92 // Add non-characters. If this becomes a performance bottleneck by |
75 // any chance, do not add these to |set| and change IsFilenameLegal() | 93 // any chance, do not add these to |set| and change IsFilenameLegal() |
76 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling | 94 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling |
77 // containsNone(). | 95 // IsAllowedName(). |
78 set->add(0xFDD0, 0xFDEF); | 96 illegal_anywhere_->add(0xFDD0, 0xFDEF); |
79 for (int i = 0; i <= 0x10; ++i) { | 97 for (int i = 0; i <= 0x10; ++i) { |
80 int plane_base = 0x10000 * i; | 98 int plane_base = 0x10000 * i; |
81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 99 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
82 } | 100 } |
83 set->freeze(); | 101 illegal_anywhere_->freeze(); |
102 illegal_at_ends_->freeze(); | |
84 } | 103 } |
85 | 104 |
86 } // namespace | 105 } // namespace |
87 | 106 |
88 bool IsFilenameLegal(const string16& file_name) { | 107 bool IsFilenameLegal(const string16& file_name) { |
89 return IllegalCharacters::GetInstance()->containsNone(file_name); | 108 return IllegalCharacters::GetInstance()->IsAllowedName(file_name); |
90 } | 109 } |
91 | 110 |
92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 111 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
93 char replace_char) { | 112 char replace_char) { |
94 DCHECK(file_name); | 113 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); |
95 | 114 |
96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); | 115 DCHECK(!(illegal->DisallowedEverywhere(replace_char))); |
116 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); | |
97 | 117 |
98 // Remove leading and trailing whitespace. | |
99 TrimWhitespace(*file_name, TRIM_ALL, file_name); | |
100 | |
101 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); | |
102 int cursor = 0; // The ICU macros expect an int. | 118 int cursor = 0; // The ICU macros expect an int. |
103 while (cursor < static_cast<int>(file_name->size())) { | 119 while (cursor < static_cast<int>(file_name->size())) { |
104 int char_begin = cursor; | 120 int char_begin = cursor; |
105 uint32 code_point; | 121 uint32 code_point; |
106 #if defined(OS_MACOSX) | 122 #if defined(OS_MACOSX) |
107 // Mac uses UTF-8 encoding for filenames. | 123 // Mac uses UTF-8 encoding for filenames. |
108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 124 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
109 code_point); | 125 code_point); |
110 #elif defined(OS_WIN) | 126 #elif defined(OS_WIN) |
111 // Windows uses UTF-16 encoding for filenames. | 127 // Windows uses UTF-16 encoding for filenames. |
112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 128 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
113 code_point); | 129 code_point); |
114 #elif defined(OS_POSIX) | 130 #elif defined(OS_POSIX) |
115 // Linux doesn't actually define an encoding. It basically allows anything | 131 // Linux doesn't actually define an encoding. It basically allows anything |
116 // except for a few special ASCII characters. | 132 // except for a few special ASCII characters. |
117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); | 133 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); |
118 if (cur_char >= 0x80) | 134 if (cur_char >= 0x80) |
119 continue; | 135 continue; |
120 code_point = cur_char; | 136 code_point = cur_char; |
121 #else | 137 #else |
122 NOTREACHED(); | 138 NOTREACHED(); |
123 #endif | 139 #endif |
124 | 140 |
125 if (illegal->contains(code_point)) { | 141 if (illegal->DisallowedEverywhere(code_point) || |
142 ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && | |
143 illegal->DisallowedLeadingOrTrailing(code_point))) { | |
126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); | 144 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); |
127 // We just made the potentially multi-byte/word char into one that only | 145 // We just made the potentially multi-byte/word char into one that only |
128 // takes one byte/word, so need to adjust the cursor to point to the next | 146 // takes one byte/word, so need to adjust the cursor to point to the next |
129 // character again. | 147 // character again. |
130 cursor = char_begin + 1; | 148 cursor = char_begin + 1; |
131 } | 149 } |
132 } | 150 } |
133 } | 151 } |
134 | 152 |
135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 153 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
(...skipping 27 matching lines...) Expand all Loading... | |
163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), | 181 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), |
164 kCodepageUTF8, | 182 kCodepageUTF8, |
165 &normalized_str)) { | 183 &normalized_str)) { |
166 *file_name = file_name->DirName().Append(FilePath(normalized_str)); | 184 *file_name = file_name->DirName().Append(FilePath(normalized_str)); |
167 } | 185 } |
168 #endif | 186 #endif |
169 } | 187 } |
170 | 188 |
171 } // namespace i18n | 189 } // namespace i18n |
172 } // namespace base | 190 } // namespace base |
OLD | NEW |