| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // This file contains utility functions for dealing with localized | |
| 6 // content. | |
| 7 | |
| 8 #ifndef CHROME_COMMON_L10N_UTIL_H_ | |
| 9 #define CHROME_COMMON_L10N_UTIL_H_ | |
| 10 | |
| 11 #include "build/build_config.h" | |
| 12 | |
| 13 #include <algorithm> | |
| 14 #include <functional> | |
| 15 #include <string> | |
| 16 #include <vector> | |
| 17 | |
| 18 #include "base/basictypes.h" | |
| 19 #include "base/logging.h" | |
| 20 #include "base/scoped_ptr.h" | |
| 21 #include "base/string16.h" | |
| 22 #include "base/string_util.h" | |
| 23 #include "unicode/coll.h" | |
| 24 #include "unicode/locid.h" | |
| 25 #include "unicode/rbbi.h" | |
| 26 #include "unicode/ubidi.h" | |
| 27 #include "unicode/uchar.h" | |
| 28 | |
| 29 class FilePath; | |
| 30 class PrefService; | |
| 31 | |
| 32 namespace l10n_util { | |
| 33 | |
| 34 const char16 kRightToLeftMark = 0x200f; | |
| 35 const char16 kLeftToRightMark = 0x200e; | |
| 36 const char16 kLeftToRightEmbeddingMark = 0x202A; | |
| 37 const char16 kRightToLeftEmbeddingMark = 0x202B; | |
| 38 const char16 kPopDirectionalFormatting = 0x202C; | |
| 39 | |
| 40 // This method is responsible for determining the locale as defined below. In | |
| 41 // nearly all cases you shouldn't call this, rather use GetApplicationLocale | |
| 42 // defined on browser_process. | |
| 43 // | |
| 44 // Returns the locale used by the Application. First we use the value from the | |
| 45 // command line (--lang), second we try the value in the prefs file (passed in | |
| 46 // as |pref_locale|), finally, we fall back on the system locale. We only return | |
| 47 // a value if there's a corresponding resource DLL for the locale. Otherwise, | |
| 48 // we fall back to en-us. | |
| 49 std::wstring GetApplicationLocale(const std::wstring& pref_locale); | |
| 50 | |
| 51 // Given a locale code, return true if the OS is capable of supporting it. | |
| 52 // For instance, Oriya is not well supported on Windows XP and we return | |
| 53 // false for "or". | |
| 54 bool IsLocaleSupportedByOS(const std::wstring& locale); | |
| 55 | |
| 56 // This method returns the Local Name of the Locale Code. For example, for | |
| 57 // |local_code_wstr| = "en-US", it returns "English (United States)". | |
| 58 // |app_locale_wstr| can be obtained in the UI thread - for example: | |
| 59 // const std::wstring app_locale_wstr = g_browser_process-> | |
| 60 // GetApplicationLocale(); | |
| 61 // If |is_for_ui| is true, U+200F is appended so that it can be | |
| 62 // rendered properly in a RTL Chrome. | |
| 63 std::wstring GetLocalName(const std::string& locale_code_str, | |
| 64 const std::wstring& app_locale_wstr, | |
| 65 bool is_for_ui); | |
| 66 | |
| 67 // Pulls resource string from the string bundle and returns it. | |
| 68 std::wstring GetString(int message_id); | |
| 69 std::string GetStringUTF8(int message_id); | |
| 70 | |
| 71 // Get a resource string and replace $1-$2-$3 with |a| and |b| | |
| 72 // respectively. Additionally, $$ is replaced by $. | |
| 73 std::wstring GetStringF(int message_id, | |
| 74 const std::wstring& a); | |
| 75 std::wstring GetStringF(int message_id, | |
| 76 const std::wstring& a, | |
| 77 const std::wstring& b); | |
| 78 std::wstring GetStringF(int message_id, | |
| 79 const std::wstring& a, | |
| 80 const std::wstring& b, | |
| 81 const std::wstring& c); | |
| 82 std::string GetStringFUTF8(int message_id, | |
| 83 const string16& a); | |
| 84 std::string GetStringFUTF8(int message_id, | |
| 85 const string16& a, | |
| 86 const string16& b); | |
| 87 std::string GetStringFUTF8(int message_id, | |
| 88 const string16& a, | |
| 89 const string16& b, | |
| 90 const string16& c); | |
| 91 | |
| 92 // Variants that return the offset(s) of the replaced parameters. The | |
| 93 // vector based version returns offsets ordered by parameter. For example if | |
| 94 // invoked with a and b offsets[0] gives the offset for a and offsets[1] the | |
| 95 // offset of b regardless of where the parameters end up in the string. | |
| 96 std::wstring GetStringF(int message_id, | |
| 97 const std::wstring& a, | |
| 98 size_t* offset); | |
| 99 std::wstring GetStringF(int message_id, | |
| 100 const std::wstring& a, | |
| 101 const std::wstring& b, | |
| 102 std::vector<size_t>* offsets); | |
| 103 | |
| 104 // Convenience formatters for a single number. | |
| 105 std::wstring GetStringF(int message_id, int a); | |
| 106 std::wstring GetStringF(int message_id, int64 a); | |
| 107 | |
| 108 // Truncates the string to length characters. This breaks the string at | |
| 109 // the first word break before length, adding the horizontal ellipsis | |
| 110 // character (unicode character 0x2026) to render ... | |
| 111 // The supplied string is returned if the string has length characters or | |
| 112 // less. | |
| 113 std::wstring TruncateString(const std::wstring& string, size_t length); | |
| 114 | |
| 115 // Returns the lower case equivalent of string. | |
| 116 std::wstring ToLower(const std::wstring& string); | |
| 117 | |
| 118 // Represents the text direction returned by the GetTextDirection() function. | |
| 119 enum TextDirection { | |
| 120 UNKNOWN_DIRECTION, | |
| 121 RIGHT_TO_LEFT, | |
| 122 LEFT_TO_RIGHT, | |
| 123 }; | |
| 124 | |
| 125 // Returns the text direction for the default ICU locale. It is assumed | |
| 126 // that SetICUDefaultLocale has been called to set the default locale to | |
| 127 // the UI locale of Chrome. Its return is one of the following three: | |
| 128 // * LEFT_TO_RIGHT: Left-To-Right (e.g. English, Chinese, etc.); | |
| 129 // * RIGHT_TO_LEFT: Right-To-Left (e.g. Arabic, Hebrew, etc.), and; | |
| 130 // * UNKNOWN_DIRECTION: unknown (or error). | |
| 131 TextDirection GetTextDirection(); | |
| 132 | |
| 133 // Returns the text direction for |locale_name|. | |
| 134 TextDirection GetTextDirectionForLocale(const char* locale_name); | |
| 135 | |
| 136 // Given the string in |text|, returns the directionality of the first | |
| 137 // character with strong directionality in the string. If no character in the | |
| 138 // text has strong directionality, LEFT_TO_RIGHT is returned. The Bidi | |
| 139 // character types L, LRE, LRO, R, AL, RLE, and RLO are considered as strong | |
| 140 // directionality characters. Please refer to http://unicode.org/reports/tr9/ | |
| 141 // for more information. | |
| 142 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text); | |
| 143 | |
| 144 // Given the string in |text|, this function creates a copy of the string with | |
| 145 // the appropriate Unicode formatting marks that mark the string direction | |
| 146 // (either left-to-right or right-to-left). The new string is returned in | |
| 147 // |localized_text|. The function checks both the current locale and the | |
| 148 // contents of the string in order to determine the direction of the returned | |
| 149 // string. The function returns true if the string in |text| was properly | |
| 150 // adjusted. | |
| 151 // | |
| 152 // Certain LTR strings are not rendered correctly when the context is RTL. For | |
| 153 // example, the string "Foo!" will appear as "!Foo" if it is rendered as is in | |
| 154 // an RTL context. Calling this function will make sure the returned localized | |
| 155 // string is always treated as a right-to-left string. This is done by | |
| 156 // inserting certain Unicode formatting marks into the returned string. | |
| 157 // | |
| 158 // TODO(idana) bug# 1206120: this function adjusts the string in question only | |
| 159 // if the current locale is right-to-left. The function does not take care of | |
| 160 // the opposite case (an RTL string displayed in an LTR context) since | |
| 161 // adjusting the string involves inserting Unicode formatting characters that | |
| 162 // Windows does not handle well unless right-to-left language support is | |
| 163 // installed. Since the English version of Windows doesn't have right-to-left | |
| 164 // language support installed by default, inserting the direction Unicode mark | |
| 165 // results in Windows displaying squares. | |
| 166 bool AdjustStringForLocaleDirection(const std::wstring& text, | |
| 167 std::wstring* localized_text); | |
| 168 | |
| 169 // Returns true if the string contains at least one character with strong right | |
| 170 // to left directionality; that is, a character with either R or AL Unicode | |
| 171 // BiDi character type. | |
| 172 bool StringContainsStrongRTLChars(const std::wstring& text); | |
| 173 | |
| 174 // Wraps a string with an LRE-PDF pair which essentialy marks the string as a | |
| 175 // Left-To-Right string. Doing this is useful in order to make sure LTR | |
| 176 // strings are rendered properly in an RTL context. | |
| 177 void WrapStringWithLTRFormatting(std::wstring* text); | |
| 178 | |
| 179 // Wraps a string with an RLE-PDF pair which essentialy marks the string as a | |
| 180 // Right-To-Left string. Doing this is useful in order to make sure RTL | |
| 181 // strings are rendered properly in an LTR context. | |
| 182 void WrapStringWithRTLFormatting(std::wstring* text); | |
| 183 | |
| 184 // Wraps individual file path components to get them to display correctly in an | |
| 185 // RTL UI. All filepaths should be passed through this function before display | |
| 186 // in UI for RTL locales. | |
| 187 void WrapPathWithLTRFormatting(const FilePath& path, | |
| 188 string16* rtl_safe_path); | |
| 189 | |
| 190 // Returns the default text alignment to be used when drawing text on a | |
| 191 // ChromeCanvas based on the directionality of the system locale language. This | |
| 192 // function is used by ChromeCanvas::DrawStringInt when the text alignment is | |
| 193 // not specified. | |
| 194 // | |
| 195 // This function returns either ChromeCanvas::TEXT_ALIGN_LEFT or | |
| 196 // ChromeCanvas::TEXT_ALIGN_RIGHT. | |
| 197 int DefaultCanvasTextAlignment(); | |
| 198 | |
| 199 // Compares the two strings using the specified collator. | |
| 200 UCollationResult CompareStringWithCollator(const Collator* collator, | |
| 201 const std::wstring& lhs, | |
| 202 const std::wstring& rhs); | |
| 203 | |
| 204 // Used by SortStringsUsingMethod. Invokes a method on the objects passed to | |
| 205 // operator (), comparing the string results using a collator. | |
| 206 template <class T, class Method> | |
| 207 class StringMethodComparatorWithCollator : | |
| 208 public std::binary_function<const std::wstring&, | |
| 209 const std::wstring&, | |
| 210 bool> { | |
| 211 public: | |
| 212 StringMethodComparatorWithCollator(Collator* collator, Method method) | |
| 213 : collator_(collator), | |
| 214 method_(method) { } | |
| 215 | |
| 216 // Returns true if lhs preceeds rhs. | |
| 217 bool operator() (T* lhs_t, T* rhs_t) { | |
| 218 return CompareStringWithCollator(collator_, (lhs_t->*method_)(), | |
| 219 (rhs_t->*method_)()) == UCOL_LESS; | |
| 220 } | |
| 221 | |
| 222 private: | |
| 223 Collator* collator_; | |
| 224 Method method_; | |
| 225 }; | |
| 226 | |
| 227 // Used by SortStringsUsingMethod. Invokes a method on the objects passed to | |
| 228 // operator (), comparing the string results using <. | |
| 229 template <class T, class Method> | |
| 230 class StringMethodComparator : public std::binary_function<const std::wstring&, | |
| 231 const std::wstring&, | |
| 232 bool> { | |
| 233 public: | |
| 234 explicit StringMethodComparator(Method method) : method_(method) { } | |
| 235 | |
| 236 // Returns true if lhs preceeds rhs. | |
| 237 bool operator() (T* lhs_t, T* rhs_t) { | |
| 238 return (lhs_t->*method_)() < (rhs_t->*method_)(); | |
| 239 } | |
| 240 | |
| 241 private: | |
| 242 Method method_; | |
| 243 }; | |
| 244 | |
| 245 // Sorts the objects in |elements| using the method |method|, which must return | |
| 246 // a string. Sorting is done using a collator, unless a collator can not be | |
| 247 // found in which case the strings are sorted using the operator <. | |
| 248 template <class T, class Method> | |
| 249 void SortStringsUsingMethod(const std::wstring& locale, | |
| 250 std::vector<T*>* elements, | |
| 251 Method method) { | |
| 252 UErrorCode error = U_ZERO_ERROR; | |
| 253 Locale loc(WideToUTF8(locale).c_str()); | |
| 254 scoped_ptr<Collator> collator(Collator::createInstance(loc, error)); | |
| 255 if (U_FAILURE(error)) { | |
| 256 sort(elements->begin(), elements->end(), | |
| 257 StringMethodComparator<T,Method>(method)); | |
| 258 return; | |
| 259 } | |
| 260 | |
| 261 std::sort(elements->begin(), elements->end(), | |
| 262 StringMethodComparatorWithCollator<T,Method>(collator.get(), method)); | |
| 263 } | |
| 264 | |
| 265 // Compares two elements' string keys and returns true if the first element's | |
| 266 // string key is less than the second element's string key. The Element must | |
| 267 // have a method like the follow format to return the string key. | |
| 268 // const std::wstring& GetStringKey() const; | |
| 269 // This uses the locale specified in the constructor. | |
| 270 template <class Element> | |
| 271 class StringComparator : public std::binary_function<const Element&, | |
| 272 const Element&, | |
| 273 bool> { | |
| 274 public: | |
| 275 explicit StringComparator(Collator* collator) | |
| 276 : collator_(collator) { } | |
| 277 | |
| 278 // Returns true if lhs precedes rhs. | |
| 279 bool operator()(const Element& lhs, const Element& rhs) { | |
| 280 const std::wstring& lhs_string_key = lhs.GetStringKey(); | |
| 281 const std::wstring& rhs_string_key = rhs.GetStringKey(); | |
| 282 | |
| 283 return StringComparator<std::wstring>(collator_)(lhs_string_key, | |
| 284 rhs_string_key); | |
| 285 } | |
| 286 | |
| 287 private: | |
| 288 Collator* collator_; | |
| 289 }; | |
| 290 | |
| 291 // Specialization of operator() method for std::wstring version. | |
| 292 template <> | |
| 293 bool StringComparator<std::wstring>::operator()(const std::wstring& lhs, | |
| 294 const std::wstring& rhs); | |
| 295 | |
| 296 // In place sorting of |elements| of a vector according to the string key of | |
| 297 // each element in the vector by using collation rules for |locale|. | |
| 298 // |begin_index| points to the start position of elements in the vector which | |
| 299 // want to be sorted. |end_index| points to the end position of elements in the | |
| 300 // vector which want to be sorted | |
| 301 template <class Element> | |
| 302 void SortVectorWithStringKey(const std::wstring& locale, | |
| 303 std::vector<Element>* elements, | |
| 304 unsigned int begin_index, | |
| 305 unsigned int end_index, | |
| 306 bool needs_stable_sort) { | |
| 307 DCHECK(begin_index >= 0 && begin_index < end_index && | |
| 308 end_index <= static_cast<unsigned int>(elements->size())); | |
| 309 UErrorCode error = U_ZERO_ERROR; | |
| 310 Locale loc(WideToASCII(locale).c_str()); | |
| 311 scoped_ptr<Collator> collator(Collator::createInstance(loc, error)); | |
| 312 if (U_FAILURE(error)) | |
| 313 collator.reset(); | |
| 314 StringComparator<Element> c(collator.get()); | |
| 315 if (needs_stable_sort) { | |
| 316 stable_sort(elements->begin() + begin_index, | |
| 317 elements->begin() + end_index, | |
| 318 c); | |
| 319 } else { | |
| 320 sort(elements->begin() + begin_index, elements->begin() + end_index, c); | |
| 321 } | |
| 322 } | |
| 323 | |
| 324 template <class Element> | |
| 325 void SortVectorWithStringKey(const std::wstring& locale, | |
| 326 std::vector<Element>* elements, | |
| 327 bool needs_stable_sort) { | |
| 328 SortVectorWithStringKey<Element>(locale, elements, 0, elements->size(), | |
| 329 needs_stable_sort); | |
| 330 } | |
| 331 | |
| 332 // In place sorting of strings using collation rules for |locale|. | |
| 333 // TODO(port): this should take string16. | |
| 334 void SortStrings(const std::wstring& locale, | |
| 335 std::vector<std::wstring>* strings); | |
| 336 | |
| 337 // Returns a vector of available locale codes. E.g., a vector containing | |
| 338 // en-US, es, fr, fi, pt-PT, pt-BR, etc. | |
| 339 const std::vector<std::string>& GetAvailableLocales(); | |
| 340 | |
| 341 // A simple wrapper class for the bidirectional iterator of ICU. | |
| 342 // This class uses the bidirectional iterator of ICU to split a line of | |
| 343 // bidirectional texts into visual runs in its display order. | |
| 344 class BiDiLineIterator { | |
| 345 public: | |
| 346 BiDiLineIterator() : bidi_(NULL) { } | |
| 347 ~BiDiLineIterator(); | |
| 348 | |
| 349 // Initializes the bidirectional iterator with the specified text. Returns | |
| 350 // whether initialization succeeded. | |
| 351 UBool Open(const std::wstring& text, bool right_to_left, bool url); | |
| 352 | |
| 353 // Returns the number of visual runs in the text, or zero on error. | |
| 354 int CountRuns(); | |
| 355 | |
| 356 // Gets the logical offset, length, and direction of the specified visual run. | |
| 357 UBiDiDirection GetVisualRun(int index, int* start, int* length); | |
| 358 | |
| 359 // Given a start position, figure out where the run ends (and the BiDiLevel). | |
| 360 void GetLogicalRun(int start, int* end, UBiDiLevel* level); | |
| 361 | |
| 362 private: | |
| 363 UBiDi* bidi_; | |
| 364 | |
| 365 DISALLOW_COPY_AND_ASSIGN(BiDiLineIterator); | |
| 366 }; | |
| 367 | |
| 368 } | |
| 369 | |
| 370 #endif // CHROME_COMMON_L10N_UTIL_H_ | |
| OLD | NEW |