OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
6 | 6 |
7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
8 | 8 |
9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
10 #include "base/scoped_ptr.h" | 10 #include "base/scoped_ptr.h" |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
117 }; | 117 }; |
118 | 118 |
119 } // namespace | 119 } // namespace |
120 | 120 |
121 namespace file_util { | 121 namespace file_util { |
122 | 122 |
123 bool IsFilenameLegal(const string16& file_name) { | 123 bool IsFilenameLegal(const string16& file_name) { |
124 return Singleton<IllegalCharacters>()->containsNone(file_name); | 124 return Singleton<IllegalCharacters>()->containsNone(file_name); |
125 } | 125 } |
126 | 126 |
127 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { | 127 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
| 128 char replace_char) { |
128 DCHECK(file_name); | 129 DCHECK(file_name); |
129 | 130 |
130 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) && | 131 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); |
131 replace_char < 0x10000); | |
132 | 132 |
133 // Remove leading and trailing whitespace. | 133 // Remove leading and trailing whitespace. |
134 TrimWhitespace(*file_name, TRIM_ALL, file_name); | 134 TrimWhitespace(*file_name, TRIM_ALL, file_name); |
135 | 135 |
136 if (IsFilenameLegal(WideToUTF16(*file_name))) | 136 IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); |
137 return; | 137 int cursor = 0; // The ICU macros expect an int. |
| 138 while (cursor < static_cast<int>(file_name->size())) { |
| 139 int char_begin = cursor; |
| 140 uint32 code_point; |
| 141 #if defined(OS_MACOSX) |
| 142 // Mac uses UTF-8 encoding for filenames. |
| 143 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 144 code_point); |
| 145 #elif defined(OS_WIN) |
| 146 // Windows uses UTF-16 encoding for filenames. |
| 147 U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 148 code_point); |
| 149 #elif defined(OS_LINUX) |
| 150 // Linux doesn't actually define an encoding. It basically allows anything |
| 151 // except for a few special ASCII characters. |
| 152 unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); |
| 153 if (cur_char >= 0x80) |
| 154 continue; |
| 155 code_point = cur_char; |
| 156 #else |
| 157 NOTREACHED(); |
| 158 #endif |
138 | 159 |
139 std::wstring::size_type i = 0; | 160 if (illegal->contains(code_point)) { |
140 std::wstring::size_type length = file_name->size(); | 161 file_name->replace(char_begin, cursor - char_begin, 1, replace_char); |
141 const wchar_t* wstr = file_name->data(); | 162 // We just made the potentially multi-byte/word char into one that only |
142 #if defined(WCHAR_T_IS_UTF16) | 163 // takes one byte/word, so need to adjust the cursor to point to the next |
143 // Using |span| method of UnicodeSet might speed things up a bit, but | 164 // character again. |
144 // it's not likely to matter here. | 165 cursor = char_begin + 1; |
145 std::wstring temp; | |
146 temp.reserve(length); | |
147 while (i < length) { | |
148 UChar32 ucs4; | |
149 std::wstring::size_type prev = i; | |
150 U16_NEXT(wstr, i, length, ucs4); | |
151 if (Singleton<IllegalCharacters>()->contains(ucs4)) { | |
152 temp.push_back(replace_char); | |
153 } else if (ucs4 < 0x10000) { | |
154 temp.push_back(ucs4); | |
155 } else { | |
156 temp.push_back(wstr[prev]); | |
157 temp.push_back(wstr[prev + 1]); | |
158 } | 166 } |
159 } | 167 } |
160 file_name->swap(temp); | |
161 #elif defined(WCHAR_T_IS_UTF32) | |
162 while (i < length) { | |
163 if (Singleton<IllegalCharacters>()->contains(wstr[i])) { | |
164 (*file_name)[i] = replace_char; | |
165 } | |
166 ++i; | |
167 } | |
168 #else | |
169 #error wchar_t* should be either UTF-16 or UTF-32 | |
170 #endif | |
171 } | 168 } |
172 | 169 |
173 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 170 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
174 #if defined(OS_WIN) | 171 #if defined(OS_WIN) |
175 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), | 172 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), |
176 b.value().c_str()) < 0; | 173 b.value().c_str()) < 0; |
177 | 174 |
178 #elif defined(OS_POSIX) | 175 #elif defined(OS_POSIX) |
179 // On linux, the file system encoding is not defined. We assume | 176 // On linux, the file system encoding is not defined. We assume |
180 // SysNativeMBToWide takes care of it. | 177 // SysNativeMBToWide takes care of it. |
181 // | 178 // |
182 // ICU's collator can take strings in OS native encoding. But we convert the | 179 // ICU's collator can take strings in OS native encoding. But we convert the |
183 // strings to UTF-16 ourselves to ensure conversion consistency. | 180 // strings to UTF-16 ourselves to ensure conversion consistency. |
184 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? | 181 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? |
185 return Singleton<LocaleAwareComparator>()->Compare( | 182 return Singleton<LocaleAwareComparator>()->Compare( |
186 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), | 183 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), |
187 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; | 184 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; |
188 #else | 185 #else |
189 #error Not implemented on your system | 186 #error Not implemented on your system |
190 #endif | 187 #endif |
191 } | 188 } |
192 | 189 |
193 } // namespace | 190 } // namespace |
OLD | NEW |