| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // File utilities that use the ICU library go in this file. | 5 // File utilities that use the ICU library go in this file. |
| 6 | 6 |
| 7 #include "base/i18n/file_util_icu.h" | 7 #include "base/i18n/file_util_icu.h" |
| 8 | 8 |
| 9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "base/scoped_ptr.h" | 11 #include "base/scoped_ptr.h" |
| 12 #include "base/singleton.h" | 12 #include "base/singleton.h" |
| 13 #include "base/string_util.h" | 13 #include "base/string_util.h" |
| 14 #include "base/utf_string_conversions.h" | 14 #include "base/utf_string_conversions.h" |
| 15 #include "base/sys_string_conversions.h" | 15 #include "base/sys_string_conversions.h" |
| 16 #include "build/build_config.h" | 16 #include "build/build_config.h" |
| 17 #include "unicode/coll.h" | 17 #include "unicode/coll.h" |
| 18 #include "unicode/uniset.h" | 18 #include "unicode/uniset.h" |
| 19 | 19 |
| 20 namespace { | 20 namespace { |
| 21 | 21 |
| 22 class IllegalCharacters { | 22 class IllegalCharacters { |
| 23 public: | 23 public: |
| 24 static IllegalCharacters* GetInstance() { |
| 25 return Singleton<IllegalCharacters>::get(); |
| 26 } |
| 27 |
| 24 bool contains(UChar32 ucs4) { | 28 bool contains(UChar32 ucs4) { |
| 25 return !!set->contains(ucs4); | 29 return !!set->contains(ucs4); |
| 26 } | 30 } |
| 27 | 31 |
| 28 bool containsNone(const string16 &s) { | 32 bool containsNone(const string16 &s) { |
| 29 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); | 33 return !!set->containsNone(icu::UnicodeString(s.c_str(), s.size())); |
| 30 } | 34 } |
| 31 | 35 |
| 32 private: | 36 private: |
| 33 friend class Singleton<IllegalCharacters>; | 37 friend class Singleton<IllegalCharacters>; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 69 set->add(0xFDD0, 0xFDEF); | 73 set->add(0xFDD0, 0xFDEF); |
| 70 for (int i = 0; i <= 0x10; ++i) { | 74 for (int i = 0; i <= 0x10; ++i) { |
| 71 int plane_base = 0x10000 * i; | 75 int plane_base = 0x10000 * i; |
| 72 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 76 set->add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
| 73 } | 77 } |
| 74 set->freeze(); | 78 set->freeze(); |
| 75 } | 79 } |
| 76 | 80 |
| 77 class LocaleAwareComparator { | 81 class LocaleAwareComparator { |
| 78 public: | 82 public: |
| 79 LocaleAwareComparator() { | 83 static LocaleAwareComparator* GetInstance() { |
| 80 UErrorCode error_code = U_ZERO_ERROR; | 84 return Singleton<LocaleAwareComparator>::get(); |
| 81 // Use the default collator. The default locale should have been properly | |
| 82 // set by the time this constructor is called. | |
| 83 collator_.reset(icu::Collator::createInstance(error_code)); | |
| 84 DCHECK(U_SUCCESS(error_code)); | |
| 85 // Make it case-sensitive. | |
| 86 collator_->setStrength(icu::Collator::TERTIARY); | |
| 87 // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we | |
| 88 // do not pay performance penalty to guarantee sort order correctness for | |
| 89 // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a | |
| 90 // reasonable tradeoff because such file names should be rare and the sort | |
| 91 // order doesn't change much anyway. | |
| 92 } | 85 } |
| 93 | 86 |
| 94 // Note: A similar function is available in l10n_util. | 87 // Note: A similar function is available in l10n_util. |
| 95 // We cannot use it because base should not depend on l10n_util. | 88 // We cannot use it because base should not depend on l10n_util. |
| 96 // TODO(yuzo): Move some of l10n_util to base. | 89 // TODO(yuzo): Move some of l10n_util to base. |
| 97 int Compare(const string16& a, const string16& b) { | 90 int Compare(const string16& a, const string16& b) { |
| 98 // We are not sure if Collator::compare is thread-safe. | 91 // We are not sure if Collator::compare is thread-safe. |
| 99 // Use an AutoLock just in case. | 92 // Use an AutoLock just in case. |
| 100 AutoLock auto_lock(lock_); | 93 AutoLock auto_lock(lock_); |
| 101 | 94 |
| 102 UErrorCode error_code = U_ZERO_ERROR; | 95 UErrorCode error_code = U_ZERO_ERROR; |
| 103 UCollationResult result = collator_->compare( | 96 UCollationResult result = collator_->compare( |
| 104 static_cast<const UChar*>(a.c_str()), | 97 static_cast<const UChar*>(a.c_str()), |
| 105 static_cast<int>(a.length()), | 98 static_cast<int>(a.length()), |
| 106 static_cast<const UChar*>(b.c_str()), | 99 static_cast<const UChar*>(b.c_str()), |
| 107 static_cast<int>(b.length()), | 100 static_cast<int>(b.length()), |
| 108 error_code); | 101 error_code); |
| 109 DCHECK(U_SUCCESS(error_code)); | 102 DCHECK(U_SUCCESS(error_code)); |
| 110 return result; | 103 return result; |
| 111 } | 104 } |
| 112 | 105 |
| 113 private: | 106 private: |
| 107 LocaleAwareComparator() { |
| 108 UErrorCode error_code = U_ZERO_ERROR; |
| 109 // Use the default collator. The default locale should have been properly |
| 110 // set by the time this constructor is called. |
| 111 collator_.reset(icu::Collator::createInstance(error_code)); |
| 112 DCHECK(U_SUCCESS(error_code)); |
| 113 // Make it case-sensitive. |
| 114 collator_->setStrength(icu::Collator::TERTIARY); |
| 115 // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we |
| 116 // do not pay performance penalty to guarantee sort order correctness for |
| 117 // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a |
| 118 // reasonable tradeoff because such file names should be rare and the sort |
| 119 // order doesn't change much anyway. |
| 120 } |
| 121 |
| 114 scoped_ptr<icu::Collator> collator_; | 122 scoped_ptr<icu::Collator> collator_; |
| 115 Lock lock_; | 123 Lock lock_; |
| 116 friend struct DefaultSingletonTraits<LocaleAwareComparator>; | 124 friend struct DefaultSingletonTraits<LocaleAwareComparator>; |
| 117 | 125 |
| 118 DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); | 126 DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); |
| 119 }; | 127 }; |
| 120 | 128 |
| 121 } // namespace | 129 } // namespace |
| 122 | 130 |
| 123 namespace file_util { | 131 namespace file_util { |
| 124 | 132 |
| 125 bool IsFilenameLegal(const string16& file_name) { | 133 bool IsFilenameLegal(const string16& file_name) { |
| 126 return Singleton<IllegalCharacters>()->containsNone(file_name); | 134 return IllegalCharacters::GetInstance()->containsNone(file_name); |
| 127 } | 135 } |
| 128 | 136 |
| 129 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 137 void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, |
| 130 char replace_char) { | 138 char replace_char) { |
| 131 DCHECK(file_name); | 139 DCHECK(file_name); |
| 132 | 140 |
| 133 DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); | 141 DCHECK(!(IllegalCharacters::GetInstance()->contains(replace_char))); |
| 134 | 142 |
| 135 // Remove leading and trailing whitespace. | 143 // Remove leading and trailing whitespace. |
| 136 TrimWhitespace(*file_name, TRIM_ALL, file_name); | 144 TrimWhitespace(*file_name, TRIM_ALL, file_name); |
| 137 | 145 |
| 138 IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); | 146 IllegalCharacters* illegal = IllegalCharacters::GetInstance(); |
| 139 int cursor = 0; // The ICU macros expect an int. | 147 int cursor = 0; // The ICU macros expect an int. |
| 140 while (cursor < static_cast<int>(file_name->size())) { | 148 while (cursor < static_cast<int>(file_name->size())) { |
| 141 int char_begin = cursor; | 149 int char_begin = cursor; |
| 142 uint32 code_point; | 150 uint32 code_point; |
| 143 #if defined(OS_MACOSX) | 151 #if defined(OS_MACOSX) |
| 144 // Mac uses UTF-8 encoding for filenames. | 152 // Mac uses UTF-8 encoding for filenames. |
| 145 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 153 U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), |
| 146 code_point); | 154 code_point); |
| 147 #elif defined(OS_WIN) | 155 #elif defined(OS_WIN) |
| 148 // Windows uses UTF-16 encoding for filenames. | 156 // Windows uses UTF-16 encoding for filenames. |
| (...skipping 15 matching lines...) Expand all Loading... |
| 164 // We just made the potentially multi-byte/word char into one that only | 172 // We just made the potentially multi-byte/word char into one that only |
| 165 // takes one byte/word, so need to adjust the cursor to point to the next | 173 // takes one byte/word, so need to adjust the cursor to point to the next |
| 166 // character again. | 174 // character again. |
| 167 cursor = char_begin + 1; | 175 cursor = char_begin + 1; |
| 168 } | 176 } |
| 169 } | 177 } |
| 170 } | 178 } |
| 171 | 179 |
| 172 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 180 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { |
| 173 #if defined(OS_WIN) | 181 #if defined(OS_WIN) |
| 174 return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), | 182 return LocaleAwareComparator::GetInstance()->Compare(a.value().c_str(), |
| 175 b.value().c_str()) < 0; | 183 b.value().c_str()) < 0; |
| 176 | 184 |
| 177 #elif defined(OS_POSIX) | 185 #elif defined(OS_POSIX) |
| 178 // On linux, the file system encoding is not defined. We assume | 186 // On linux, the file system encoding is not defined. We assume |
| 179 // SysNativeMBToWide takes care of it. | 187 // SysNativeMBToWide takes care of it. |
| 180 // | 188 // |
| 181 // ICU's collator can take strings in OS native encoding. But we convert the | 189 // ICU's collator can take strings in OS native encoding. But we convert the |
| 182 // strings to UTF-16 ourselves to ensure conversion consistency. | 190 // strings to UTF-16 ourselves to ensure conversion consistency. |
| 183 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? | 191 // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? |
| 184 return Singleton<LocaleAwareComparator>()->Compare( | 192 return LocaleAwareComparator::GetInstance()->Compare( |
| 185 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), | 193 WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), |
| 186 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; | 194 WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; |
| 187 #else | 195 #else |
| 188 #error Not implemented on your system | 196 #error Not implemented on your system |
| 189 #endif | 197 #endif |
| 190 } | 198 } |
| 191 | 199 |
| 192 } // namespace | 200 } // namespace |
| OLD | NEW |