OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
6 | 6 |
7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
8 | 8 |
9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/scoped_ptr.h" | 11 #include "base/scoped_ptr.h" |
12 #include "base/singleton.h" | 12 #include "base/singleton.h" |
13 #include "base/string_util.h" | 13 #include "base/string_util.h" |
14 #include "base/utf_string_conversions.h" | 14 #include "base/utf_string_conversions.h" |
15 #include "base/sys_string_conversions.h" | 15 #include "base/sys_string_conversions.h" |
16 #include "build/build_config.h" | 16 #include "build/build_config.h" |
17 #include "unicode/coll.h" | 17 #include "unicode/coll.h" |
18 #include "unicode/uniset.h" | 18 #include "unicode/uniset.h" |
19 | 19 |
20 namespace { | 20 namespace { |
21 | 21 |
22 class IllegalCharacters { | 22 class IllegalCharacters { |
23 public: | 23 public: |
| 24 static IllegalCharacters* GetInstance() { |
| 25 return Singleton<IllegalCharacters>::get(); |
| 26 } |
| 27 |
24 bool contains(UChar32 ucs4) { | 28 bool contains(UChar32 ucs4) { |
25 return !!set->contains(ucs4); | 29 return !!set->contains(ucs4); |
26 } | 30 } |
27 | 31 |
28 bool containsNone(const string16 &s) { | 32 bool containsNone(const string16 &s) { |
29 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 33 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); |
30 } | 34 } |
31 | 35 |
32 private: | 36 private: |
33 friend class Singleton<IllegalCharacters>; | 37 friend class Singleton<IllegalCharacters>; |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
69 set->add(0xFDD0, 0xFDEF); | 73 set->add(0xFDD0, 0xFDEF); |
70 for (int i = 0; i <= 0x10; ++i) { | 74 for (int i = 0; i <= 0x10; ++i) { |
71 int plane_base = 0x10000 * i; | 75 int plane_base = 0x10000 * i; |
72 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 76 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
73 } | 77 } |
74 set->freeze(); | 78 set->freeze(); |
75 } | 79 } |
76 | 80 |
77 class LocaleAwareComparator { | 81 class LocaleAwareComparator { |
78 public: | 82 public: |
79 LocaleAwareComparator() { | 83 static LocaleAwareComparator* GetInstance() { |
80 UErrorCode error_code = U_ZERO_ERROR; | 84 return Singleton<LocaleAwareComparator>::get(); |
81 // Use the default collator. The default locale should have been properly | |
82 // set by the time this constructor is called. | |
83 collator_.reset(icu::Collator::createInstance(error_code)); | |
84 DCHECK(U_SUCCESS(error_code)); | |
85 // Make it case-sensitive. | |
86 collator_->setStrength(icu::Collator::TERTIARY); | |
87 // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we | |
88 // do not pay performance penalty to guarantee sort order correctness for | |
89 // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a | |
90 // reasonable tradeoff because such file names should be rare and the sort | |
91 // order doesn't change much anyway. | |
92 } | 85 } |
93 | 86 |
94 // Note: A similar function is available in l10n_util. | 87 // Note: A similar function is available in l10n_util. |
95 // We cannot use it because base should not depend on l10n_util. | 88 // We cannot use it because base should not depend on l10n_util. |
96 // TODO(yuzo): Move some of l10n_util to base. | 89 // TODO(yuzo): Move some of l10n_util to base. |
97 int Compare(const string16& a, const string16& b) { | 90 int Compare(const string16& a, const string16& b) { |
98 // We are not sure if Collator::compare is thread-safe. | 91 // We are not sure if Collator::compare is thread-safe. |
99 // Use an AutoLock just in case. | 92 // Use an AutoLock just in case. |
100 AutoLock auto_lock(lock_); | 93 AutoLock auto_lock(lock_); |
101 | 94 |
102 UErrorCode error_code = U_ZERO_ERROR; | 95 UErrorCode error_code = U_ZERO_ERROR; |
103 UCollationResult result = collator_->compare( | 96 UCollationResult result = collator_->compare( |
104 static_cast<const UChar*>(a.c_str()), | 97 static_cast<const UChar*>(a.c_str()), |
105 static_cast<int>(a.length()), | 98 static_cast<int>(a.length()), |
106 static_cast<const UChar*>(b.c_str()), | 99 static_cast<const UChar*>(b.c_str()), |
107 static_cast<int>(b.length()), | 100 static_cast<int>(b.length()), |
108 error_code); | 101 error_code); |
109 DCHECK(U_SUCCESS(error_code)); | 102 DCHECK(U_SUCCESS(error_code)); |
110 return result; | 103 return result; |
111 } | 104 } |
112 | 105 |
113 private: | 106 private: |
| 107 LocaleAwareComparator() { |
| 108 UErrorCode error_code = U_ZERO_ERROR; |
| 109 // Use the default collator. The default locale should have been properly |
| 110 // set by the time this constructor is called. |
| 111 collator_.reset(icu::Collator::createInstance(error_code)); |
| 112 DCHECK(U_SUCCESS(error_code)); |
| 113 // Make it case-sensitive. |
| 114 collator_->setStrength(icu::Collator::TERTIARY); |
| 115 // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we |
| 116 // do not pay performance penalty to guarantee sort order correctness for |
| 117 // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a |
| 118 // reasonable tradeoff because such file names should be rare and the sort |
| 119 // order doesn't change much anyway. |
| 120 } |
| 121 |
114 scoped_ptr<icu::Collator> collator_; | 122 scoped_ptr<icu::Collator> collator_; |
115 Lock lock_; | 123 Lock lock_; |
116 friend struct DefaultSingletonTraits<LocaleAwareComparator>; | 124 friend struct DefaultSingletonTraits<LocaleAwareComparator>; |
117 | 125 |
118 DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); | 126 DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); |
119 }; | 127 }; |
120 | 128 |
121 } // namespace | 129 } // namespace |
122 | 130 |
123 namespace file_util { | 131 namespace file_util { |
124 | 132 |
125 bool IsFilenameLegal(const string16& file_name) { | 133 bool IsFilenameLegal(const string16& file_name) { |
126 return Singleton<IllegalCharacters>()->containsNone(file_name); | 134 return IllegalCharacters::GetInstance()->containsNone(file_name); |
127 } | 135 } |
128 | 136 |
129 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 137 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
130 char replace_char) { | 138 char replace_char) { |
131 DCHECK(file_name); | 139 DCHECK(file_name); |
132 | 140 |
133 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); | 141 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); |
134 | 142 |
135 // Remove leading and trailing whitespace. | 143 // Remove leading and trailing whitespace. |
136 TrimWhitespace(*file_name, TRIM_ALL, file_name); | 144 TrimWhitespace(*file_name, TRIM_ALL, file_name); |
137 | 145 |
138 IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); | 146 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); |
139 int cursor = 0; // The ICU macros expect an int. | 147 int cursor = 0; // The ICU macros expect an int. |
140 while (cursor < static_cast<int>(file_name->size())) { | 148 while (cursor < static_cast<int>(file_name->size())) { |
141 int char_begin = cursor; | 149 int char_begin = cursor; |
142 uint32 code_point; | 150 uint32 code_point; |
143 #if defined(OS_MACOSX) | 151 #if defined(OS_MACOSX) |
144 // Mac uses UTF-8 encoding for filenames. | 152 // Mac uses UTF-8 encoding for filenames. |
145 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 153 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
146 code_point); | 154 code_point); |
147 #elif defined(OS_WIN) | 155 #elif defined(OS_WIN) |
148 // Windows uses UTF-16 encoding for filenames. | 156 // Windows uses UTF-16 encoding for filenames. |
(...skipping 15 matching lines...) Expand all Loading... |
164 // We just made the potentially multi-byte/word char into one that only | 172 // We just made the potentially multi-byte/word char into one that only |
165 // takes one byte/word, so need to adjust the cursor to point to the next | 173 // takes one byte/word, so need to adjust the cursor to point to the next |
166 // character again. | 174 // character again. |
167 cursor = char_begin + 1; | 175 cursor = char_begin + 1; |
168 } | 176 } |
169 } | 177 } |
170 } | 178 } |
171 | 179 |
172 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 180 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
173 #if defined(OS_WIN) | 181 #if defined(OS_WIN) |
174 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), | 182 return LocaleAwareComparator::GetInstance()->Compare(a.value().c_str(), |
175 b.value().c_str()) < 0; | 183 b.value().c_str()) < 0; |
176 | 184 |
177 #elif defined(OS_POSIX) | 185 #elif defined(OS_POSIX) |
178 // On linux, the file system encoding is not defined. We assume | 186 // On linux, the file system encoding is not defined. We assume |
179 // SysNativeMBToWide takes care of it. | 187 // SysNativeMBToWide takes care of it. |
180 // | 188 // |
181 // ICU's collator can take strings in OS native encoding. But we convert the | 189 // ICU's collator can take strings in OS native encoding. But we convert the |
182 // strings to UTF-16 ourselves to ensure conversion consistency. | 190 // strings to UTF-16 ourselves to ensure conversion consistency. |
183 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? | 191 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? |
184 return Singleton<LocaleAwareComparator>()->Compare( | 192 return LocaleAwareComparator::GetInstance()->Compare( |
185 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), | 193 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), |
186 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; | 194 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; |
187 #else | 195 #else |
188 #error Not implemented on your system | 196 #error Not implemented on your system |
189 #endif | 197 #endif |
190 } | 198 } |
191 | 199 |
192 } // namespace | 200 } // namespace |
OLD | NEW |