OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // File utilities that use the ICU library go in this file. Functions using ICU | 5 // File utilities that use the ICU library go in this file. |
6 // are separated from the other functions to prevent ICU being pulled in by the | |
7 // linker if there is a false dependency. | |
8 // | |
9 // (The VS2005 linker finds such a false dependency and adds ~300K of ICU to | |
10 // chrome.exe if this code lives in file_util.cc, even though none of this code | |
11 // is called.) | |
12 | 6 |
13 #include "base/file_util.h" | 7 #include "base/i18n/file_util_icu.h" |
14 | 8 |
| 9 #include "base/file_path.h" |
| 10 #include "base/scoped_ptr.h" |
15 #include "base/singleton.h" | 11 #include "base/singleton.h" |
16 #include "base/string_util.h" | 12 #include "base/string_util.h" |
| 13 #include "base/sys_string_conversions.h" |
| 14 #include "build/build_config.h" |
| 15 #include "unicode/coll.h" |
17 #include "unicode/uniset.h" | 16 #include "unicode/uniset.h" |
18 | 17 |
19 namespace { | 18 namespace { |
| 19 |
20 class IllegalCharacters { | 20 class IllegalCharacters { |
21 public: | 21 public: |
22 bool contains(UChar32 ucs4) { | 22 bool contains(UChar32 ucs4) { |
23 return !!set->contains(ucs4); | 23 return !!set->contains(ucs4); |
24 } | 24 } |
25 | 25 |
26 bool containsNone(const string16 &s) { | 26 bool containsNone(const string16 &s) { |
27 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 27 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); |
28 } | 28 } |
29 | 29 |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
65 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling | 65 // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling |
66 // containsNone(). | 66 // containsNone(). |
67 set->add(0xFDD0, 0xFDEF); | 67 set->add(0xFDD0, 0xFDEF); |
68 for (int i = 0; i <= 0x10; ++i) { | 68 for (int i = 0; i <= 0x10; ++i) { |
69 int plane_base = 0x10000 * i; | 69 int plane_base = 0x10000 * i; |
70 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 70 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
71 } | 71 } |
72 set->freeze(); | 72 set->freeze(); |
73 } | 73 } |
74 | 74 |
| 75 class LocaleAwareComparator { |
| 76 public: |
| 77 LocaleAwareComparator() { |
| 78 UErrorCode error_code = U_ZERO_ERROR; |
| 79 // Use the default collator. The default locale should have been properly |
| 80 // set by the time this constructor is called. |
| 81 collator_.reset(icu::Collator::createInstance(error_code)); |
| 82 DCHECK(U_SUCCESS(error_code)); |
| 83 // Make it case-sensitive. |
| 84 collator_->setStrength(icu::Collator::TERTIARY); |
| 85 // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we |
| 86 // do not pay performance penalty to guarantee sort order correctness for |
| 87 // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a |
| 88 // reasonable tradeoff because such file names should be rare and the sort |
| 89 // order doesn't change much anyway. |
| 90 } |
| 91 |
| 92 // Note: A similar function is available in l10n_util. |
| 93 // We cannot use it because base should not depend on l10n_util. |
| 94 // TODO(yuzo): Move some of l10n_util to base. |
| 95 int Compare(const string16& a, const string16& b) { |
| 96 // We are not sure if Collator::compare is thread-safe. |
| 97 // Use an AutoLock just in case. |
| 98 AutoLock auto_lock(lock_); |
| 99 |
| 100 UErrorCode error_code = U_ZERO_ERROR; |
| 101 UCollationResult result = collator_->compare( |
| 102 static_cast<const UChar*>(a.c_str()), |
| 103 static_cast<int>(a.length()), |
| 104 static_cast<const UChar*>(b.c_str()), |
| 105 static_cast<int>(b.length()), |
| 106 error_code); |
| 107 DCHECK(U_SUCCESS(error_code)); |
| 108 return result; |
| 109 } |
| 110 |
| 111 private: |
| 112 scoped_ptr<icu::Collator> collator_; |
| 113 Lock lock_; |
| 114 friend struct DefaultSingletonTraits<LocaleAwareComparator>; |
| 115 |
| 116 DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); |
| 117 }; |
| 118 |
75 } // namespace | 119 } // namespace |
76 | 120 |
77 namespace file_util { | 121 namespace file_util { |
78 | 122 |
79 bool IsFilenameLegal(const string16& file_name) { | 123 bool IsFilenameLegal(const string16& file_name) { |
80 return Singleton<IllegalCharacters>()->containsNone(file_name); | 124 return Singleton<IllegalCharacters>()->containsNone(file_name); |
81 } | 125 } |
82 | 126 |
83 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { | 127 void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { |
84 DCHECK(file_name); | 128 DCHECK(file_name); |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
119 if (Singleton<IllegalCharacters>()->contains(wstr[i])) { | 163 if (Singleton<IllegalCharacters>()->contains(wstr[i])) { |
120 (*file_name)[i] = replace_char; | 164 (*file_name)[i] = replace_char; |
121 } | 165 } |
122 ++i; | 166 ++i; |
123 } | 167 } |
124 #else | 168 #else |
125 #error wchar_t* should be either UTF-16 or UTF-32 | 169 #error wchar_t* should be either UTF-16 or UTF-32 |
126 #endif | 170 #endif |
127 } | 171 } |
128 | 172 |
| 173 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
| 174 #if defined(OS_WIN) |
| 175 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), |
| 176 b.value().c_str()) < 0; |
| 177 |
| 178 #elif defined(OS_POSIX) |
| 179 // On linux, the file system encoding is not defined. We assume |
| 180 // SysNativeMBToWide takes care of it. |
| 181 // |
| 182 // ICU's collator can take strings in OS native encoding. But we convert the |
| 183 // strings to UTF-16 ourselves to ensure conversion consistency. |
| 184 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? |
| 185 return Singleton<LocaleAwareComparator>()->Compare( |
| 186 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), |
| 187 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; |
| 188 #else |
| 189 #error Not implemented on your system |
| 190 #endif |
| 191 } |
| 192 |
129 } // namespace | 193 } // namespace |
OLD | NEW |