Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(652)

Side by Side Diff: base/i18n/file_util_icu.cc

Issue 869823003: Update ReplaceIllegalCharactersInPath to handle quirks in HFS+ and VFS (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // File utilities that use the ICU library go in this file. 5 // File utilities that use the ICU library go in this file.
6 6
7 #include "base/i18n/file_util_icu.h" 7 #include "base/i18n/file_util_icu.h"
8 8
9 #include "base/files/file_path.h" 9 #include "base/files/file_path.h"
10 #include "base/i18n/icu_string_conversions.h" 10 #include "base/i18n/icu_string_conversions.h"
(...skipping 12 matching lines...) Expand all
23 namespace i18n { 23 namespace i18n {
24 24
25 namespace { 25 namespace {
26 26
27 class IllegalCharacters { 27 class IllegalCharacters {
28 public: 28 public:
29 static IllegalCharacters* GetInstance() { 29 static IllegalCharacters* GetInstance() {
30 return Singleton<IllegalCharacters>::get(); 30 return Singleton<IllegalCharacters>::get();
31 } 31 }
32 32
33 bool contains(UChar32 ucs4) { 33 bool DisallowedEverywhere(UChar32 ucs4) {
34 return !!set->contains(ucs4); 34 return !!illegal_anywhere_->contains(ucs4);
35 } 35 }
36 36
37 bool containsNone(const string16 &s) { 37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) {
38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); 38 return !!illegal_at_ends_->contains(ucs4);
39 }
40
41 bool IsAllowedName(const string16& s) {
42 return s.empty() || (!!illegal_anywhere_->containsNone(
43 icu::UnicodeString(s.c_str(), s.size())) &&
44 !illegal_at_ends_->contains(*s.begin()) &&
45 !illegal_at_ends_->contains(*s.rbegin()));
39 } 46 }
40 47
41 private: 48 private:
42 friend class Singleton<IllegalCharacters>; 49 friend class Singleton<IllegalCharacters>;
43 friend struct DefaultSingletonTraits<IllegalCharacters>; 50 friend struct DefaultSingletonTraits<IllegalCharacters>;
44 51
45 IllegalCharacters(); 52 IllegalCharacters();
46 ~IllegalCharacters() { } 53 ~IllegalCharacters() { }
47 54
48 scoped_ptr<icu::UnicodeSet> set; 55 // set of characters considered invalid anywhere inside a filename.
56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_;
57
58 // set of characters considered invalid at either end of a filename.
59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_;
49 60
50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); 61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);
51 }; 62 };
52 63
53 IllegalCharacters::IllegalCharacters() { 64 IllegalCharacters::IllegalCharacters() {
54 UErrorCode status = U_ZERO_ERROR; 65 UErrorCode everywhere_status = U_ZERO_ERROR;
55 // Control characters, formatting characters, non-characters, and 66 UErrorCode ends_status = U_ZERO_ERROR;
56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). 67 // Control characters, formatting characters, non-characters, path separators,
68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx 69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx 70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they 71 // Note that code points in the "Other, Format" (Cf) category are ignored on
60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used 72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being
61 // elsewhere, they can be confusing/problematic. 73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is
62 // Also, consider wrapping the set with our Singleton class to create and 74 // also excluded due to the possibility of interacting poorly with short
63 // freeze it only once. Note that there's a trade-off between memory and 75 // filenames on VFAT. (Related to CVE-2014-9390)
64 // speed.
65 #if defined(WCHAR_T_IS_UTF16) 76 #if defined(WCHAR_T_IS_UTF16)
66 set.reset(new icu::UnicodeSet(icu::UnicodeString( 77 illegal_anywhere_.reset(
67 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); 78 new icu::UnicodeSet(icu::UnicodeString(L"[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"),
79 everywhere_status));
80 illegal_at_ends_.reset(
81 new icu::UnicodeSet(icu::UnicodeString(L"[[:WSpace:][.]]"), ends_status));
68 #else 82 #else
69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( 83 illegal_anywhere_.reset(new icu::UnicodeSet(
70 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), 84 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"),
71 status)); 85 everywhere_status));
86 illegal_at_ends_.reset(new icu::UnicodeSet(
87 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status));
jungshik at Google 2015/01/30 23:13:25 You don't need |#if defined| block any more (actua
asanka 2015/01/30 23:36:48 Done
72 #endif 88 #endif
73 DCHECK(U_SUCCESS(status)); 89 DCHECK(U_SUCCESS(everywhere_status));
90 DCHECK(U_SUCCESS(ends_status));
91
74 // Add non-characters. If this becomes a performance bottleneck by 92 // Add non-characters. If this becomes a performance bottleneck by
75 // any chance, do not add these to |set| and change IsFilenameLegal() 93 // any chance, do not add these to |set| and change IsFilenameLegal()
76 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling 94 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling
77 // containsNone(). 95 // IsAllowedName().
78 set->add(0xFDD0, 0xFDEF); 96 illegal_anywhere_->add(0xFDD0, 0xFDEF);
79 for (int i = 0; i <= 0x10; ++i) { 97 for (int i = 0; i <= 0x10; ++i) {
80 int plane_base = 0x10000 * i; 98 int plane_base = 0x10000 * i;
81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); 99 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF);
82 } 100 }
83 set->freeze(); 101 illegal_anywhere_->freeze();
102 illegal_at_ends_->freeze();
84 } 103 }
85 104
86 } // namespace 105 } // namespace
87 106
88 bool IsFilenameLegal(const string16& file_name) { 107 bool IsFilenameLegal(const string16& file_name) {
89 return IllegalCharacters::GetInstance()->containsNone(file_name); 108 return IllegalCharacters::GetInstance()->IsAllowedName(file_name);
90 } 109 }
91 110
92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, 111 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
93 char replace_char) { 112 char replace_char) {
94 DCHECK(file_name); 113 IllegalCharacters* illegal = IllegalCharacters::GetInstance();
95 114
96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); 115 DCHECK(!(illegal->DisallowedEverywhere(replace_char)));
116 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char)));
97 117
98 // Remove leading and trailing whitespace.
99 TrimWhitespace(*file_name, TRIM_ALL, file_name);
100
101 IllegalCharacters* illegal = IllegalCharacters::GetInstance();
102 int cursor = 0; // The ICU macros expect an int. 118 int cursor = 0; // The ICU macros expect an int.
103 while (cursor < static_cast<int>(file_name->size())) { 119 while (cursor < static_cast<int>(file_name->size())) {
104 int char_begin = cursor; 120 int char_begin = cursor;
105 uint32 code_point; 121 uint32 code_point;
106 #if defined(OS_MACOSX) 122 #if defined(OS_MACOSX)
107 // Mac uses UTF-8 encoding for filenames. 123 // Mac uses UTF-8 encoding for filenames.
108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), 124 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
109 code_point); 125 code_point);
110 #elif defined(OS_WIN) 126 #elif defined(OS_WIN)
111 // Windows uses UTF-16 encoding for filenames. 127 // Windows uses UTF-16 encoding for filenames.
112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), 128 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
113 code_point); 129 code_point);
114 #elif defined(OS_POSIX) 130 #elif defined(OS_POSIX)
115 // Linux doesn't actually define an encoding. It basically allows anything 131 // Linux doesn't actually define an encoding. It basically allows anything
116 // except for a few special ASCII characters. 132 // except for a few special ASCII characters.
117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); 133 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);
118 if (cur_char >= 0x80) 134 if (cur_char >= 0x80)
119 continue; 135 continue;
120 code_point = cur_char; 136 code_point = cur_char;
121 #else 137 #else
122 NOTREACHED(); 138 NOTREACHED();
123 #endif 139 #endif
124 140
125 if (illegal->contains(code_point)) { 141 if (illegal->DisallowedEverywhere(code_point) ||
142 ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) &&
143 illegal->DisallowedLeadingOrTrailing(code_point))) {
126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); 144 file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
127 // We just made the potentially multi-byte/word char into one that only 145 // We just made the potentially multi-byte/word char into one that only
128 // takes one byte/word, so need to adjust the cursor to point to the next 146 // takes one byte/word, so need to adjust the cursor to point to the next
129 // character again. 147 // character again.
130 cursor = char_begin + 1; 148 cursor = char_begin + 1;
131 } 149 }
132 } 150 }
133 } 151 }
134 152
135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { 153 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
(...skipping 27 matching lines...) Expand all
163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), 181 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(),
164 kCodepageUTF8, 182 kCodepageUTF8,
165 &normalized_str)) { 183 &normalized_str)) {
166 *file_name = file_name->DirName().Append(FilePath(normalized_str)); 184 *file_name = file_name->DirName().Append(FilePath(normalized_str));
167 } 185 }
168 #endif 186 #endif
169 } 187 }
170 188
171 } // namespace i18n 189 } // namespace i18n
172 } // namespace base 190 } // namespace base
OLDNEW
« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698