| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "build/build_config.h" | |
| 6 | |
| 7 #include "chrome/common/l10n_util.h" | |
| 8 | |
| 9 #include "app/resource_bundle.h" | |
| 10 #include "base/command_line.h" | |
| 11 #include "base/file_util.h" | |
| 12 #include "base/path_service.h" | |
| 13 #include "base/scoped_ptr.h" | |
| 14 #include "base/string16.h" | |
| 15 #include "base/string_piece.h" | |
| 16 #include "base/string_util.h" | |
| 17 #include "base/sys_string_conversions.h" | |
| 18 #include "chrome/common/chrome_paths.h" | |
| 19 #include "chrome/common/chrome_switches.h" | |
| 20 #include "chrome/common/gfx/chrome_canvas.h" | |
| 21 #include "unicode/uscript.h" | |
| 22 | |
| 23 // TODO(playmobil): remove this undef once SkPostConfig.h is fixed. | |
| 24 // skia/include/corecg/SkPostConfig.h #defines strcasecmp() so we can't use | |
| 25 // base::strcasecmp() without #undefing it here. | |
| 26 #undef strcasecmp | |
| 27 | |
| 28 namespace { | |
| 29 | |
| 30 #if defined(OS_WIN) | |
| 31 static const FilePath::CharType kLocaleFileExtension[] = L".dll"; | |
| 32 #elif defined(OS_POSIX) | |
| 33 static const FilePath::CharType kLocaleFileExtension[] = ".pak"; | |
| 34 #endif | |
| 35 | |
| 36 // Added to the end of strings that are too big in TrucateString. | |
| 37 static const wchar_t* const kElideString = L"\x2026"; | |
| 38 | |
| 39 // Get language and region from the OS. | |
| 40 void GetLanguageAndRegionFromOS(std::string* lang, std::string* region) { | |
| 41 // Later we may have to change this to be OS-dependent so that | |
| 42 // it's not affected by ICU's default locale. It's all right | |
| 43 // to do this way because SetICUDefaultLocale is internal | |
| 44 // to this file and we know where/when it's called. | |
| 45 Locale locale = Locale::getDefault(); | |
| 46 const char* language = locale.getLanguage(); | |
| 47 const char* country = locale.getCountry(); | |
| 48 DCHECK(language); | |
| 49 *lang = language; | |
| 50 *region = country; | |
| 51 } | |
| 52 | |
| 53 // Convert Chrome locale name to ICU locale name | |
| 54 std::string ICULocaleName(const std::wstring& locale_string) { | |
| 55 // If not Spanish, just return it. | |
| 56 if (locale_string.substr(0, 2) != L"es") | |
| 57 return WideToASCII(locale_string); | |
| 58 // Expand es to es-ES. | |
| 59 if (LowerCaseEqualsASCII(locale_string, "es")) | |
| 60 return "es-ES"; | |
| 61 // Map es-419 (Latin American Spanish) to es-FOO depending on the system | |
| 62 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map | |
| 63 // to es-MX (the most populous in Spanish-speaking Latin America). | |
| 64 if (LowerCaseEqualsASCII(locale_string, "es-419")) { | |
| 65 std::string lang, region; | |
| 66 GetLanguageAndRegionFromOS(&lang, ®ion); | |
| 67 if (LowerCaseEqualsASCII(lang, "es") && | |
| 68 !LowerCaseEqualsASCII(region, "es")) { | |
| 69 lang.append("-"); | |
| 70 lang.append(region); | |
| 71 return lang; | |
| 72 } | |
| 73 return "es-MX"; | |
| 74 } | |
| 75 // Currently, Chrome has only "es" and "es-419", but later we may have | |
| 76 // more specific "es-RR". | |
| 77 return WideToASCII(locale_string); | |
| 78 } | |
| 79 | |
| 80 // Sets the default locale of ICU. | |
| 81 // When the application locale (UI locale) of Chrome is specified with | |
| 82 // '--lang' command line flag or 'intl.app_locale' entry in the "Preferences", | |
| 83 // the default locale of ICU need to be changed to match the application locale | |
| 84 // so that ICU functions work correctly in a locale-dependent manner. | |
| 85 // This is handy in that we don't have to call GetApplicationLocale() | |
| 86 // everytime we call locale-dependent ICU APIs as long as we make sure | |
| 87 // that this is called before any locale-dependent API is called. | |
| 88 UBool SetICUDefaultLocale(const std::wstring& locale_string) { | |
| 89 Locale locale(ICULocaleName(locale_string).c_str()); | |
| 90 UErrorCode error_code = U_ZERO_ERROR; | |
| 91 Locale::setDefault(locale, error_code); | |
| 92 // This return value is actually bogus because Locale object is | |
| 93 // an ID and setDefault seems to always succeed (regardless of the | |
| 94 // presence of actual locale data). However, | |
| 95 // it does not hurt to have it as a sanity check. | |
| 96 return U_SUCCESS(error_code); | |
| 97 } | |
| 98 | |
| 99 // Returns true if |locale_name| has an alias in the ICU data file. | |
| 100 bool IsDuplicateName(const std::string& locale_name) { | |
| 101 static const char* const kDuplicateNames[] = { | |
| 102 "en", | |
| 103 "pt", | |
| 104 "zh", | |
| 105 "zh_hans_cn", | |
| 106 "zh_hant_tw" | |
| 107 }; | |
| 108 | |
| 109 // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). | |
| 110 // 'es-419' (Spanish in Latin America) is not available in ICU so that it | |
| 111 // has to be added manually in GetAvailableLocales(). | |
| 112 if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) | |
| 113 return true; | |
| 114 for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { | |
| 115 if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) | |
| 116 return true; | |
| 117 } | |
| 118 return false; | |
| 119 } | |
| 120 | |
| 121 bool IsLocaleAvailable(const std::wstring& locale, | |
| 122 const std::wstring& locale_path) { | |
| 123 std::wstring test_locale = locale; | |
| 124 // If locale has any illegal characters in it, we don't want to try to | |
| 125 // load it because it may be pointing outside the locale data file directory. | |
| 126 file_util::ReplaceIllegalCharacters(&test_locale, ' '); | |
| 127 if (test_locale != locale) | |
| 128 return false; | |
| 129 | |
| 130 if (!l10n_util::IsLocaleSupportedByOS(locale)) | |
| 131 return false; | |
| 132 | |
| 133 FilePath test_path = FilePath::FromWStringHack(locale_path) | |
| 134 .Append(FilePath::FromWStringHack(locale)) | |
| 135 .ReplaceExtension(kLocaleFileExtension); | |
| 136 return file_util::PathExists(test_path) && SetICUDefaultLocale(locale); | |
| 137 } | |
| 138 | |
| 139 bool CheckAndResolveLocale(const std::wstring& locale, | |
| 140 const std::wstring& locale_path, | |
| 141 std::wstring* resolved_locale) { | |
| 142 if (IsLocaleAvailable(locale, locale_path)) { | |
| 143 *resolved_locale = locale; | |
| 144 return true; | |
| 145 } | |
| 146 // If the locale matches language but not country, use that instead. | |
| 147 // TODO(jungshik) : Nothing is done about languages that Chrome | |
| 148 // does not support but available on Windows. We fall | |
| 149 // back to en-US in GetApplicationLocale so that it's a not critical, | |
| 150 // but we can do better. | |
| 151 std::wstring::size_type hyphen_pos = locale.find(L'-'); | |
| 152 if (hyphen_pos != std::wstring::npos && hyphen_pos > 0) { | |
| 153 std::wstring lang(locale, 0, hyphen_pos); | |
| 154 std::wstring region(locale, hyphen_pos + 1); | |
| 155 std::wstring tmp_locale(lang); | |
| 156 // Map es-RR other than es-ES to es-419 (Chrome's Latin American | |
| 157 // Spanish locale). | |
| 158 if (LowerCaseEqualsASCII(lang, "es") && !LowerCaseEqualsASCII(region, "es")) | |
| 159 tmp_locale.append(L"-419"); | |
| 160 else if (LowerCaseEqualsASCII(lang, "zh")) { | |
| 161 // Map zh-HK and zh-MK to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. | |
| 162 if (LowerCaseEqualsASCII(region, "hk") || | |
| 163 LowerCaseEqualsASCII(region, "mk")) { | |
| 164 tmp_locale.append(L"-TW"); | |
| 165 } else { | |
| 166 tmp_locale.append(L"-CN"); | |
| 167 } | |
| 168 } | |
| 169 if (IsLocaleAvailable(tmp_locale, locale_path)) { | |
| 170 resolved_locale->swap(tmp_locale); | |
| 171 return true; | |
| 172 } | |
| 173 } | |
| 174 | |
| 175 // Google updater uses no, iw and en for our nb, he, and en-US. | |
| 176 // We need to map them to our codes. | |
| 177 struct { | |
| 178 const char* source; | |
| 179 const wchar_t* dest;} alias_map[] = { | |
| 180 {"no", L"nb"}, | |
| 181 {"tl", L"fil"}, | |
| 182 {"iw", L"he"}, | |
| 183 {"en", L"en-US"}, | |
| 184 }; | |
| 185 | |
| 186 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) { | |
| 187 if (LowerCaseEqualsASCII(locale, alias_map[i].source)) { | |
| 188 std::wstring tmp_locale(alias_map[i].dest); | |
| 189 if (IsLocaleAvailable(tmp_locale, locale_path)) { | |
| 190 resolved_locale->swap(tmp_locale); | |
| 191 return true; | |
| 192 } | |
| 193 } | |
| 194 } | |
| 195 | |
| 196 return false; | |
| 197 } | |
| 198 | |
| 199 // Get the locale of the operating system. The return value is of the form | |
| 200 // language[-country] (e.g., en-US) where the language is the 2 letter code from | |
| 201 // ISO-639. | |
| 202 std::wstring GetSystemLocale() { | |
| 203 std::string language, region; | |
| 204 GetLanguageAndRegionFromOS(&language, ®ion); | |
| 205 std::string ret; | |
| 206 if (!language.empty()) | |
| 207 ret.append(language); | |
| 208 if (!region.empty()) { | |
| 209 ret.append("-"); | |
| 210 ret.append(region); | |
| 211 } | |
| 212 return ASCIIToWide(ret); | |
| 213 } | |
| 214 | |
| 215 } // namespace | |
| 216 | |
| 217 namespace l10n_util { | |
| 218 | |
| 219 // Represents the locale-specific text direction. | |
| 220 static TextDirection g_text_direction = UNKNOWN_DIRECTION; | |
| 221 | |
| 222 std::wstring GetApplicationLocale(const std::wstring& pref_locale) { | |
| 223 #if defined(OS_MACOSX) | |
| 224 // On the mac, we don't want to test preferences or ICU for the language, | |
| 225 // we want to use whatever Cocoa is using when it loaded the main nib file. | |
| 226 // It handles all the mapping and fallbacks for us, we just need to ask | |
| 227 // Cocoa. | |
| 228 // TODO(pinkerton): break this out into a .mm and ask Cocoa. | |
| 229 return L"en"; | |
| 230 #else | |
| 231 FilePath locale_path; | |
| 232 PathService::Get(chrome::DIR_LOCALES, &locale_path); | |
| 233 std::wstring resolved_locale; | |
| 234 | |
| 235 // First, check to see if there's a --lang flag. | |
| 236 const CommandLine& parsed_command_line = *CommandLine::ForCurrentProcess(); | |
| 237 const std::wstring& lang_arg = | |
| 238 parsed_command_line.GetSwitchValue(switches::kLang); | |
| 239 if (!lang_arg.empty()) { | |
| 240 if (CheckAndResolveLocale(lang_arg, locale_path.ToWStringHack(), | |
| 241 &resolved_locale)) | |
| 242 return resolved_locale; | |
| 243 } | |
| 244 | |
| 245 // Second, try user prefs. | |
| 246 if (!pref_locale.empty()) { | |
| 247 if (CheckAndResolveLocale(pref_locale, locale_path.ToWStringHack(), | |
| 248 &resolved_locale)) | |
| 249 return resolved_locale; | |
| 250 } | |
| 251 | |
| 252 // Next, try the system locale. | |
| 253 const std::wstring system_locale = GetSystemLocale(); | |
| 254 if (CheckAndResolveLocale(system_locale, locale_path.ToWStringHack(), | |
| 255 &resolved_locale)) | |
| 256 return resolved_locale; | |
| 257 | |
| 258 // Fallback on en-US. | |
| 259 const std::wstring fallback_locale(L"en-US"); | |
| 260 if (IsLocaleAvailable(fallback_locale, locale_path.ToWStringHack())) | |
| 261 return fallback_locale; | |
| 262 | |
| 263 // No locale data file was found; we shouldn't get here. | |
| 264 NOTREACHED(); | |
| 265 | |
| 266 return std::wstring(); | |
| 267 #endif | |
| 268 } | |
| 269 | |
| 270 std::wstring GetLocalName(const std::string& locale_code_str, | |
| 271 const std::wstring& app_locale_wstr, | |
| 272 bool is_for_ui) { | |
| 273 const std::string app_locale = WideToASCII(app_locale_wstr); | |
| 274 const char* locale_code = locale_code_str.c_str(); | |
| 275 UErrorCode error = U_ZERO_ERROR; | |
| 276 const int buffer_size = 1024; | |
| 277 | |
| 278 #if defined(WCHAR_T_IS_UTF32) | |
| 279 string16 name_local_utf16; | |
| 280 int actual_size = uloc_getDisplayName(locale_code, app_locale.c_str(), | |
| 281 WriteInto(&name_local_utf16, buffer_size + 1), buffer_size, &error); | |
| 282 std::wstring name_local = UTF16ToWide(name_local_utf16); | |
| 283 #else | |
| 284 std::wstring name_local; | |
| 285 int actual_size = uloc_getDisplayName(locale_code, app_locale.c_str(), | |
| 286 WriteInto(&name_local, buffer_size + 1), buffer_size, &error); | |
| 287 #endif | |
| 288 DCHECK(U_SUCCESS(error)); | |
| 289 name_local.resize(actual_size); | |
| 290 // Add an RTL mark so parentheses are properly placed. | |
| 291 if (is_for_ui && GetTextDirection() == RIGHT_TO_LEFT) { | |
| 292 name_local.push_back(static_cast<wchar_t>(kRightToLeftMark)); | |
| 293 } | |
| 294 return name_local; | |
| 295 } | |
| 296 | |
| 297 std::wstring GetString(int message_id) { | |
| 298 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); | |
| 299 return UTF16ToWide(rb.GetLocalizedString(message_id)); | |
| 300 } | |
| 301 | |
| 302 std::string GetStringUTF8(int message_id) { | |
| 303 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); | |
| 304 return UTF16ToUTF8(rb.GetLocalizedString(message_id)); | |
| 305 } | |
| 306 | |
| 307 static string16 GetStringF(int message_id, | |
| 308 const string16& a, | |
| 309 const string16& b, | |
| 310 const string16& c, | |
| 311 const string16& d, | |
| 312 std::vector<size_t>* offsets) { | |
| 313 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); | |
| 314 const string16& format_string = rb.GetLocalizedString(message_id); | |
| 315 string16 formatted = ReplaceStringPlaceholders(format_string, a, b, c, d, | |
| 316 offsets); | |
| 317 return formatted; | |
| 318 } | |
| 319 | |
| 320 std::wstring GetStringF(int message_id, const std::wstring& a) { | |
| 321 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), string16(), | |
| 322 string16(), string16(), NULL)); | |
| 323 } | |
| 324 | |
| 325 std::wstring GetStringF(int message_id, | |
| 326 const std::wstring& a, | |
| 327 const std::wstring& b) { | |
| 328 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b), | |
| 329 string16(), string16(), NULL)); | |
| 330 } | |
| 331 | |
| 332 std::wstring GetStringF(int message_id, | |
| 333 const std::wstring& a, | |
| 334 const std::wstring& b, | |
| 335 const std::wstring& c) { | |
| 336 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b), | |
| 337 WideToUTF16(c), string16(), NULL)); | |
| 338 } | |
| 339 | |
| 340 std::string GetStringFUTF8(int message_id, | |
| 341 const string16& a) { | |
| 342 return UTF16ToUTF8(GetStringF(message_id, a, string16(), string16(), | |
| 343 string16(), NULL)); | |
| 344 } | |
| 345 | |
| 346 std::string GetStringFUTF8(int message_id, | |
| 347 const string16& a, | |
| 348 const string16& b) { | |
| 349 return UTF16ToUTF8(GetStringF(message_id, a, b, string16(), string16(), | |
| 350 NULL)); | |
| 351 } | |
| 352 | |
| 353 std::string GetStringFUTF8(int message_id, | |
| 354 const string16& a, | |
| 355 const string16& b, | |
| 356 const string16& c) { | |
| 357 return UTF16ToUTF8(GetStringF(message_id, a, b, c, string16(), NULL)); | |
| 358 } | |
| 359 | |
| 360 std::wstring GetStringF(int message_id, const std::wstring& a, size_t* offset) { | |
| 361 DCHECK(offset); | |
| 362 std::vector<size_t> offsets; | |
| 363 string16 result = GetStringF(message_id, WideToUTF16(a), string16(), | |
| 364 string16(), string16(), &offsets); | |
| 365 DCHECK(offsets.size() == 1); | |
| 366 *offset = offsets[0]; | |
| 367 return UTF16ToWide(result); | |
| 368 } | |
| 369 | |
| 370 std::wstring GetStringF(int message_id, | |
| 371 const std::wstring& a, | |
| 372 const std::wstring& b, | |
| 373 std::vector<size_t>* offsets) { | |
| 374 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b), | |
| 375 string16(), string16(), offsets)); | |
| 376 } | |
| 377 | |
| 378 std::wstring GetStringF(int message_id, int a) { | |
| 379 return GetStringF(message_id, IntToWString(a)); | |
| 380 } | |
| 381 | |
| 382 std::wstring GetStringF(int message_id, int64 a) { | |
| 383 return GetStringF(message_id, Int64ToWString(a)); | |
| 384 } | |
| 385 | |
| 386 std::wstring TruncateString(const std::wstring& string, size_t length) { | |
| 387 if (string.size() <= length) | |
| 388 // String fits, return it. | |
| 389 return string; | |
| 390 | |
| 391 if (length == 0) { | |
| 392 // No room for the ellide string, return an empty string. | |
| 393 return std::wstring(L""); | |
| 394 } | |
| 395 size_t max = length - 1; | |
| 396 | |
| 397 if (max == 0) { | |
| 398 // Just enough room for the elide string. | |
| 399 return kElideString; | |
| 400 } | |
| 401 | |
| 402 #if defined(WCHAR_T_IS_UTF32) | |
| 403 const string16 string_utf16 = WideToUTF16(string); | |
| 404 #else | |
| 405 const std::wstring &string_utf16 = string; | |
| 406 #endif | |
| 407 // Use a line iterator to find the first boundary. | |
| 408 UErrorCode status = U_ZERO_ERROR; | |
| 409 scoped_ptr<RuleBasedBreakIterator> bi(static_cast<RuleBasedBreakIterator*>( | |
| 410 RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), | |
| 411 status))); | |
| 412 if (U_FAILURE(status)) | |
| 413 return string.substr(0, max) + kElideString; | |
| 414 bi->setText(string_utf16.c_str()); | |
| 415 int32_t index = bi->preceding(static_cast<int32_t>(max)); | |
| 416 if (index == BreakIterator::DONE) { | |
| 417 index = static_cast<int32_t>(max); | |
| 418 } else { | |
| 419 // Found a valid break (may be the beginning of the string). Now use | |
| 420 // a character iterator to find the previous non-whitespace character. | |
| 421 StringCharacterIterator char_iterator(string_utf16.c_str()); | |
| 422 if (index == 0) { | |
| 423 // No valid line breaks. Start at the end again. This ensures we break | |
| 424 // on a valid character boundary. | |
| 425 index = static_cast<int32_t>(max); | |
| 426 } | |
| 427 char_iterator.setIndex(index); | |
| 428 while (char_iterator.hasPrevious()) { | |
| 429 char_iterator.previous(); | |
| 430 if (!(u_isspace(char_iterator.current()) || | |
| 431 u_charType(char_iterator.current()) == U_CONTROL_CHAR || | |
| 432 u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) { | |
| 433 // Not a whitespace character. Advance the iterator so that we | |
| 434 // include the current character in the truncated string. | |
| 435 char_iterator.next(); | |
| 436 break; | |
| 437 } | |
| 438 } | |
| 439 if (char_iterator.hasPrevious()) { | |
| 440 // Found a valid break point. | |
| 441 index = char_iterator.getIndex(); | |
| 442 } else { | |
| 443 // String has leading whitespace, return the elide string. | |
| 444 return kElideString; | |
| 445 } | |
| 446 } | |
| 447 return string.substr(0, index) + kElideString; | |
| 448 } | |
| 449 | |
| 450 #if defined(WCHAR_T_IS_UTF32) | |
| 451 std::wstring ToLower(const std::wstring& string) { | |
| 452 string16 string_utf16 = WideToUTF16(string); | |
| 453 UnicodeString lower_u_str( | |
| 454 UnicodeString(string_utf16.c_str()).toLower(Locale::getDefault())); | |
| 455 string16 result_utf16; | |
| 456 lower_u_str.extract(0, lower_u_str.length(), | |
| 457 WriteInto(&result_utf16, lower_u_str.length() + 1)); | |
| 458 std::wstring result = UTF16ToWide(result_utf16); | |
| 459 return result; | |
| 460 } | |
| 461 #else | |
| 462 std::wstring ToLower(const std::wstring& string) { | |
| 463 UnicodeString lower_u_str( | |
| 464 UnicodeString(string.c_str()).toLower(Locale::getDefault())); | |
| 465 std::wstring result; | |
| 466 lower_u_str.extract(0, lower_u_str.length(), | |
| 467 WriteInto(&result, lower_u_str.length() + 1)); | |
| 468 return result; | |
| 469 } | |
| 470 #endif // defined(WCHAR_T_IS_UTF32) | |
| 471 | |
| 472 // Returns the text direction for the default ICU locale. It is assumed | |
| 473 // that SetICUDefaultLocale has been called to set the default locale to | |
| 474 // the UI locale of Chrome. | |
| 475 TextDirection GetTextDirection() { | |
| 476 if (g_text_direction == UNKNOWN_DIRECTION) { | |
| 477 const Locale& locale = Locale::getDefault(); | |
| 478 g_text_direction = GetTextDirectionForLocale(locale.getName()); | |
| 479 } | |
| 480 return g_text_direction; | |
| 481 } | |
| 482 | |
| 483 TextDirection GetTextDirectionForLocale(const char* locale_name) { | |
| 484 UScriptCode scripts[10]; // 10 scripts should be enough for any locale. | |
| 485 UErrorCode error = U_ZERO_ERROR; | |
| 486 int n = uscript_getCode(locale_name, scripts, 10, &error); | |
| 487 DCHECK(U_SUCCESS(error) && n > 0); | |
| 488 | |
| 489 // Checking Arabic and Hebrew scripts cover Arabic, Hebrew, Farsi, | |
| 490 // Urdu and Azerbaijani written in Arabic. Syriac script | |
| 491 // (another RTL) is not a living script and we didn't yet localize | |
| 492 // to locales using other living RTL scripts such as Thaana and N'ko. | |
| 493 // TODO(jungshik): Use a new ICU API, uloc_getCharacterOrientation to avoid | |
| 494 // 'hardcoded-comparision' with Arabic and Hebrew scripts once we | |
| 495 // upgrade ICU to 4.0 or later or port it to our copy of ICU. | |
| 496 if (scripts[0] == USCRIPT_ARABIC || scripts[0] == USCRIPT_HEBREW) | |
| 497 return RIGHT_TO_LEFT; | |
| 498 return LEFT_TO_RIGHT; | |
| 499 } | |
| 500 | |
| 501 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { | |
| 502 #if defined(WCHAR_T_IS_UTF32) | |
| 503 string16 text_utf16 = WideToUTF16(text); | |
| 504 const UChar* string = text_utf16.c_str(); | |
| 505 #else | |
| 506 const UChar* string = text.c_str(); | |
| 507 #endif | |
| 508 size_t length = text.length(); | |
| 509 size_t position = 0; | |
| 510 while (position < length) { | |
| 511 UChar32 character; | |
| 512 size_t next_position = position; | |
| 513 U16_NEXT(string, next_position, length, character); | |
| 514 | |
| 515 // Now that we have the character, we use ICU in order to query for the | |
| 516 // appropriate Unicode BiDi character type. | |
| 517 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); | |
| 518 if ((property == U_RIGHT_TO_LEFT) || | |
| 519 (property == U_RIGHT_TO_LEFT_ARABIC) || | |
| 520 (property == U_RIGHT_TO_LEFT_EMBEDDING) || | |
| 521 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { | |
| 522 return RIGHT_TO_LEFT; | |
| 523 } else if ((property == U_LEFT_TO_RIGHT) || | |
| 524 (property == U_LEFT_TO_RIGHT_EMBEDDING) || | |
| 525 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { | |
| 526 return LEFT_TO_RIGHT; | |
| 527 } | |
| 528 | |
| 529 position = next_position; | |
| 530 } | |
| 531 | |
| 532 return LEFT_TO_RIGHT; | |
| 533 } | |
| 534 | |
| 535 bool AdjustStringForLocaleDirection(const std::wstring& text, | |
| 536 std::wstring* localized_text) { | |
| 537 if (GetTextDirection() == LEFT_TO_RIGHT || text.length() == 0) | |
| 538 return false; | |
| 539 | |
| 540 // Marking the string as LTR if the locale is RTL and the string does not | |
| 541 // contain strong RTL characters. Otherwise, mark the string as RTL. | |
| 542 *localized_text = text; | |
| 543 bool has_rtl_chars = StringContainsStrongRTLChars(text); | |
| 544 if (!has_rtl_chars) | |
| 545 WrapStringWithLTRFormatting(localized_text); | |
| 546 else | |
| 547 WrapStringWithRTLFormatting(localized_text); | |
| 548 | |
| 549 return true; | |
| 550 } | |
| 551 | |
| 552 bool StringContainsStrongRTLChars(const std::wstring& text) { | |
| 553 #if defined(WCHAR_T_IS_UTF32) | |
| 554 string16 text_utf16 = WideToUTF16(text); | |
| 555 const UChar* string = text_utf16.c_str(); | |
| 556 #else | |
| 557 const UChar* string = text.c_str(); | |
| 558 #endif | |
| 559 size_t length = text.length(); | |
| 560 size_t position = 0; | |
| 561 while (position < length) { | |
| 562 UChar32 character; | |
| 563 size_t next_position = position; | |
| 564 U16_NEXT(string, next_position, length, character); | |
| 565 | |
| 566 // Now that we have the character, we use ICU in order to query for the | |
| 567 // appropriate Unicode BiDi character type. | |
| 568 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); | |
| 569 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) | |
| 570 return true; | |
| 571 | |
| 572 position = next_position; | |
| 573 } | |
| 574 | |
| 575 return false; | |
| 576 } | |
| 577 | |
| 578 void WrapStringWithLTRFormatting(std::wstring* text) { | |
| 579 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. | |
| 580 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); | |
| 581 | |
| 582 // Inserting a PDF (Pop Directional Formatting) mark as the last character. | |
| 583 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); | |
| 584 } | |
| 585 | |
| 586 void WrapStringWithRTLFormatting(std::wstring* text) { | |
| 587 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. | |
| 588 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); | |
| 589 | |
| 590 // Inserting a PDF (Pop Directional Formatting) mark as the last character. | |
| 591 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); | |
| 592 } | |
| 593 | |
| 594 void WrapPathWithLTRFormatting(const FilePath& path, | |
| 595 string16* rtl_safe_path) { | |
| 596 // Wrap the overall path with LRE-PDF pair which essentialy marks the | |
| 597 // string as a Left-To-Right string. | |
| 598 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. | |
| 599 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); | |
| 600 #if defined(OS_MACOSX) | |
| 601 rtl_safe_path->append(UTF8ToUTF16(path.value())); | |
| 602 #elif defined(OS_WIN) | |
| 603 rtl_safe_path->append(path.value()); | |
| 604 #else // defined(OS_LINUX) | |
| 605 std::wstring wide_path = base::SysNativeMBToWide(path.value()); | |
| 606 rtl_safe_path->append(WideToUTF16(wide_path)); | |
| 607 #endif | |
| 608 // Inserting a PDF (Pop Directional Formatting) mark as the last character. | |
| 609 rtl_safe_path->push_back(kPopDirectionalFormatting); | |
| 610 } | |
| 611 | |
| 612 int DefaultCanvasTextAlignment() { | |
| 613 if (GetTextDirection() == LEFT_TO_RIGHT) { | |
| 614 return ChromeCanvas::TEXT_ALIGN_LEFT; | |
| 615 } else { | |
| 616 return ChromeCanvas::TEXT_ALIGN_RIGHT; | |
| 617 } | |
| 618 } | |
| 619 | |
| 620 | |
| 621 // Compares the character data stored in two different strings by specified | |
| 622 // Collator instance. | |
| 623 UCollationResult CompareStringWithCollator(const Collator* collator, | |
| 624 const std::wstring& lhs, | |
| 625 const std::wstring& rhs) { | |
| 626 DCHECK(collator); | |
| 627 UErrorCode error = U_ZERO_ERROR; | |
| 628 #if defined(WCHAR_T_IS_UTF32) | |
| 629 // Need to convert to UTF-16 to be compatible with UnicodeString's | |
| 630 // constructor. | |
| 631 string16 lhs_utf16 = WideToUTF16(lhs); | |
| 632 string16 rhs_utf16 = WideToUTF16(rhs); | |
| 633 | |
| 634 UCollationResult result = collator->compare( | |
| 635 static_cast<const UChar*>(lhs_utf16.c_str()), | |
| 636 static_cast<int>(lhs_utf16.length()), | |
| 637 static_cast<const UChar*>(rhs_utf16.c_str()), | |
| 638 static_cast<int>(rhs_utf16.length()), | |
| 639 error); | |
| 640 #else | |
| 641 UCollationResult result = collator->compare( | |
| 642 static_cast<const UChar*>(lhs.c_str()), static_cast<int>(lhs.length()), | |
| 643 static_cast<const UChar*>(rhs.c_str()), static_cast<int>(rhs.length()), | |
| 644 error); | |
| 645 #endif | |
| 646 DCHECK(U_SUCCESS(error)); | |
| 647 return result; | |
| 648 } | |
| 649 | |
| 650 // Specialization of operator() method for std::wstring version. | |
| 651 template <> | |
| 652 bool StringComparator<std::wstring>::operator()(const std::wstring& lhs, | |
| 653 const std::wstring& rhs) { | |
| 654 // If we can not get collator instance for specified locale, just do simple | |
| 655 // string compare. | |
| 656 if (!collator_) | |
| 657 return lhs < rhs; | |
| 658 return CompareStringWithCollator(collator_, lhs, rhs) == UCOL_LESS; | |
| 659 }; | |
| 660 | |
| 661 void SortStrings(const std::wstring& locale, | |
| 662 std::vector<std::wstring>* strings) { | |
| 663 SortVectorWithStringKey(locale, strings, false); | |
| 664 } | |
| 665 | |
| 666 const std::vector<std::string>& GetAvailableLocales() { | |
| 667 static std::vector<std::string> locales; | |
| 668 if (locales.empty()) { | |
| 669 int num_locales = uloc_countAvailable(); | |
| 670 for (int i = 0; i < num_locales; ++i) { | |
| 671 std::string locale_name = uloc_getAvailable(i); | |
| 672 // Filter out the names that have aliases. | |
| 673 if (IsDuplicateName(locale_name)) | |
| 674 continue; | |
| 675 if (!IsLocaleSupportedByOS(ASCIIToWide(locale_name))) | |
| 676 continue; | |
| 677 // Normalize underscores to hyphens because that's what our locale files | |
| 678 // use. | |
| 679 std::replace(locale_name.begin(), locale_name.end(), '_', '-'); | |
| 680 | |
| 681 // Map the Chinese locale names over to zh-CN and zh-TW. | |
| 682 if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { | |
| 683 locale_name = "zh-CN"; | |
| 684 } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { | |
| 685 locale_name = "zh-TW"; | |
| 686 } | |
| 687 locales.push_back(locale_name); | |
| 688 } | |
| 689 | |
| 690 // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). | |
| 691 locales.push_back("es-419"); | |
| 692 } | |
| 693 return locales; | |
| 694 } | |
| 695 | |
| 696 BiDiLineIterator::~BiDiLineIterator() { | |
| 697 if (bidi_) { | |
| 698 ubidi_close(bidi_); | |
| 699 bidi_ = NULL; | |
| 700 } | |
| 701 } | |
| 702 | |
| 703 UBool BiDiLineIterator::Open(const std::wstring& text, | |
| 704 bool right_to_left, | |
| 705 bool url) { | |
| 706 DCHECK(bidi_ == NULL); | |
| 707 UErrorCode error = U_ZERO_ERROR; | |
| 708 bidi_ = ubidi_openSized(static_cast<int>(text.length()), 0, &error); | |
| 709 if (U_FAILURE(error)) | |
| 710 return false; | |
| 711 if (right_to_left && url) | |
| 712 ubidi_setReorderingMode(bidi_, UBIDI_REORDER_RUNS_ONLY); | |
| 713 #if defined(WCHAR_T_IS_UTF32) | |
| 714 const string16 text_utf16 = WideToUTF16(text); | |
| 715 #else | |
| 716 const std::wstring &text_utf16 = text; | |
| 717 #endif // U_SIZEOF_WCHAR_T != 4 | |
| 718 ubidi_setPara(bidi_, text_utf16.data(), static_cast<int>(text_utf16.length()), | |
| 719 right_to_left ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, | |
| 720 NULL, &error); | |
| 721 return U_SUCCESS(error); | |
| 722 } | |
| 723 | |
| 724 int BiDiLineIterator::CountRuns() { | |
| 725 DCHECK(bidi_ != NULL); | |
| 726 UErrorCode error = U_ZERO_ERROR; | |
| 727 const int runs = ubidi_countRuns(bidi_, &error); | |
| 728 return U_SUCCESS(error) ? runs : 0; | |
| 729 } | |
| 730 | |
| 731 UBiDiDirection BiDiLineIterator::GetVisualRun(int index, | |
| 732 int* start, | |
| 733 int* length) { | |
| 734 DCHECK(bidi_ != NULL); | |
| 735 return ubidi_getVisualRun(bidi_, index, start, length); | |
| 736 } | |
| 737 | |
| 738 void BiDiLineIterator::GetLogicalRun(int start, | |
| 739 int* end, | |
| 740 UBiDiLevel* level) { | |
| 741 DCHECK(bidi_ != NULL); | |
| 742 ubidi_getLogicalRun(bidi_, start, end, level); | |
| 743 } | |
| 744 | |
| 745 } | |
| OLD | NEW |