Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Side by Side Diff: base/i18n/file_util_icu.cc

Issue 895853003: Update from https://crrev.com/314320 (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // File utilities that use the ICU library go in this file. 5 // File utilities that use the ICU library go in this file.
6 6
7 #include "base/i18n/file_util_icu.h" 7 #include "base/i18n/file_util_icu.h"
8 8
9 #include "base/files/file_path.h" 9 #include "base/files/file_path.h"
10 #include "base/i18n/icu_string_conversions.h" 10 #include "base/i18n/icu_string_conversions.h"
(...skipping 12 matching lines...) Expand all
23 namespace i18n { 23 namespace i18n {
24 24
25 namespace { 25 namespace {
26 26
27 class IllegalCharacters { 27 class IllegalCharacters {
28 public: 28 public:
29 static IllegalCharacters* GetInstance() { 29 static IllegalCharacters* GetInstance() {
30 return Singleton<IllegalCharacters>::get(); 30 return Singleton<IllegalCharacters>::get();
31 } 31 }
32 32
33 bool contains(UChar32 ucs4) { 33 bool DisallowedEverywhere(UChar32 ucs4) {
34 return !!set->contains(ucs4); 34 return !!illegal_anywhere_->contains(ucs4);
35 } 35 }
36 36
37 bool containsNone(const string16 &s) { 37 bool DisallowedLeadingOrTrailing(UChar32 ucs4) {
38 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); 38 return !!illegal_at_ends_->contains(ucs4);
39 }
40
41 bool IsAllowedName(const string16& s) {
42 return s.empty() || (!!illegal_anywhere_->containsNone(
43 icu::UnicodeString(s.c_str(), s.size())) &&
44 !illegal_at_ends_->contains(*s.begin()) &&
45 !illegal_at_ends_->contains(*s.rbegin()));
39 } 46 }
40 47
41 private: 48 private:
42 friend class Singleton<IllegalCharacters>; 49 friend class Singleton<IllegalCharacters>;
43 friend struct DefaultSingletonTraits<IllegalCharacters>; 50 friend struct DefaultSingletonTraits<IllegalCharacters>;
44 51
45 IllegalCharacters(); 52 IllegalCharacters();
46 ~IllegalCharacters() { } 53 ~IllegalCharacters() { }
47 54
48 scoped_ptr<icu::UnicodeSet> set; 55 // set of characters considered invalid anywhere inside a filename.
56 scoped_ptr<icu::UnicodeSet> illegal_anywhere_;
57
58 // set of characters considered invalid at either end of a filename.
59 scoped_ptr<icu::UnicodeSet> illegal_at_ends_;
49 60
50 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); 61 DISALLOW_COPY_AND_ASSIGN(IllegalCharacters);
51 }; 62 };
52 63
53 IllegalCharacters::IllegalCharacters() { 64 IllegalCharacters::IllegalCharacters() {
54 UErrorCode status = U_ZERO_ERROR; 65 UErrorCode everywhere_status = U_ZERO_ERROR;
55 // Control characters, formatting characters, non-characters, and 66 UErrorCode ends_status = U_ZERO_ERROR;
56 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). 67 // Control characters, formatting characters, non-characters, path separators,
68 // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
57 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx 69 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
58 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx 70 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
59 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they 71 // Note that code points in the "Other, Format" (Cf) category are ignored on
60 // are legitimate in Arabic and some S/SE Asian scripts. However, when used 72 // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being
61 // elsewhere, they can be confusing/problematic. 73 // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is
62 // Also, consider wrapping the set with our Singleton class to create and 74 // also excluded due to the possibility of interacting poorly with short
63 // freeze it only once. Note that there's a trade-off between memory and 75 // filenames on VFAT. (Related to CVE-2014-9390)
64 // speed. 76 illegal_anywhere_.reset(new icu::UnicodeSet(
65 #if defined(WCHAR_T_IS_UTF16) 77 UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"),
66 set.reset(new icu::UnicodeSet(icu::UnicodeString( 78 everywhere_status));
67 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status)); 79 illegal_at_ends_.reset(new icu::UnicodeSet(
68 #else 80 UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status));
69 set.reset(new icu::UnicodeSet(UNICODE_STRING_SIMPLE( 81 DCHECK(U_SUCCESS(everywhere_status));
70 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), 82 DCHECK(U_SUCCESS(ends_status));
71 status)); 83
72 #endif
73 DCHECK(U_SUCCESS(status));
74 // Add non-characters. If this becomes a performance bottleneck by 84 // Add non-characters. If this becomes a performance bottleneck by
75 // any chance, do not add these to |set| and change IsFilenameLegal() 85 // any chance, do not add these to |set| and change IsFilenameLegal()
76 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling 86 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling
77 // containsNone(). 87 // IsAllowedName().
78 set->add(0xFDD0, 0xFDEF); 88 illegal_anywhere_->add(0xFDD0, 0xFDEF);
79 for (int i = 0; i <= 0x10; ++i) { 89 for (int i = 0; i <= 0x10; ++i) {
80 int plane_base = 0x10000 * i; 90 int plane_base = 0x10000 * i;
81 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); 91 illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF);
82 } 92 }
83 set->freeze(); 93 illegal_anywhere_->freeze();
94 illegal_at_ends_->freeze();
84 } 95 }
85 96
86 } // namespace 97 } // namespace
87 98
88 bool IsFilenameLegal(const string16& file_name) { 99 bool IsFilenameLegal(const string16& file_name) {
89 return IllegalCharacters::GetInstance()->containsNone(file_name); 100 return IllegalCharacters::GetInstance()->IsAllowedName(file_name);
90 } 101 }
91 102
92 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, 103 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
93 char replace_char) { 104 char replace_char) {
94 DCHECK(file_name); 105 IllegalCharacters* illegal = IllegalCharacters::GetInstance();
95 106
96 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); 107 DCHECK(!(illegal->DisallowedEverywhere(replace_char)));
108 DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char)));
97 109
98 // Remove leading and trailing whitespace.
99 TrimWhitespace(*file_name, TRIM_ALL, file_name);
100
101 IllegalCharacters* illegal = IllegalCharacters::GetInstance();
102 int cursor = 0; // The ICU macros expect an int. 110 int cursor = 0; // The ICU macros expect an int.
103 while (cursor < static_cast<int>(file_name->size())) { 111 while (cursor < static_cast<int>(file_name->size())) {
104 int char_begin = cursor; 112 int char_begin = cursor;
105 uint32 code_point; 113 uint32 code_point;
106 #if defined(OS_MACOSX) 114 #if defined(OS_MACOSX)
107 // Mac uses UTF-8 encoding for filenames. 115 // Mac uses UTF-8 encoding for filenames.
108 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), 116 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
109 code_point); 117 code_point);
110 #elif defined(OS_WIN) 118 #elif defined(OS_WIN)
111 // Windows uses UTF-16 encoding for filenames. 119 // Windows uses UTF-16 encoding for filenames.
112 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), 120 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
113 code_point); 121 code_point);
114 #elif defined(OS_POSIX) 122 #elif defined(OS_POSIX)
115 // Linux doesn't actually define an encoding. It basically allows anything 123 // Linux doesn't actually define an encoding. It basically allows anything
116 // except for a few special ASCII characters. 124 // except for a few special ASCII characters.
117 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); 125 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);
118 if (cur_char >= 0x80) 126 if (cur_char >= 0x80)
119 continue; 127 continue;
120 code_point = cur_char; 128 code_point = cur_char;
121 #else 129 #else
122 NOTREACHED(); 130 NOTREACHED();
123 #endif 131 #endif
124 132
125 if (illegal->contains(code_point)) { 133 if (illegal->DisallowedEverywhere(code_point) ||
134 ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) &&
135 illegal->DisallowedLeadingOrTrailing(code_point))) {
126 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); 136 file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
127 // We just made the potentially multi-byte/word char into one that only 137 // We just made the potentially multi-byte/word char into one that only
128 // takes one byte/word, so need to adjust the cursor to point to the next 138 // takes one byte/word, so need to adjust the cursor to point to the next
129 // character again. 139 // character again.
130 cursor = char_begin + 1; 140 cursor = char_begin + 1;
131 } 141 }
132 } 142 }
133 } 143 }
134 144
135 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { 145 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
(...skipping 27 matching lines...) Expand all
163 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), 173 if (ConvertToUtf8AndNormalize(file_name->BaseName().value(),
164 kCodepageUTF8, 174 kCodepageUTF8,
165 &normalized_str)) { 175 &normalized_str)) {
166 *file_name = file_name->DirName().Append(FilePath(normalized_str)); 176 *file_name = file_name->DirName().Append(FilePath(normalized_str));
167 } 177 }
168 #endif 178 #endif
169 } 179 }
170 180
171 } // namespace i18n 181 } // namespace i18n
172 } // namespace base 182 } // namespace base
OLDNEW
« no previous file with comments | « base/i18n/file_util_icu.h ('k') | base/i18n/file_util_icu_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698