| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "ui/base/l10n/l10n_util.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 #include <cstdlib> | |
| 9 #include <iterator> | |
| 10 #include <string> | |
| 11 | |
| 12 #include "base/command_line.h" | |
| 13 #include "base/compiler_specific.h" | |
| 14 #include "base/files/file_util.h" | |
| 15 #include "base/i18n/file_util_icu.h" | |
| 16 #include "base/i18n/rtl.h" | |
| 17 #include "base/i18n/string_compare.h" | |
| 18 #include "base/lazy_instance.h" | |
| 19 #include "base/memory/scoped_ptr.h" | |
| 20 #include "base/path_service.h" | |
| 21 #include "base/strings/string_number_conversions.h" | |
| 22 #include "base/strings/string_split.h" | |
| 23 #include "base/strings/string_util.h" | |
| 24 #include "base/strings/stringprintf.h" | |
| 25 #include "base/strings/sys_string_conversions.h" | |
| 26 #include "base/strings/utf_string_conversions.h" | |
| 27 #include "build/build_config.h" | |
| 28 #include "third_party/icu/source/common/unicode/rbbi.h" | |
| 29 #include "third_party/icu/source/common/unicode/uloc.h" | |
| 30 #include "ui/base/l10n/l10n_util_collator.h" | |
| 31 #include "ui/base/l10n/l10n_util_plurals.h" | |
| 32 #include "ui/base/resource/resource_bundle.h" | |
| 33 #include "ui/base/ui_base_paths.h" | |
| 34 | |
| 35 #if defined(OS_ANDROID) | |
| 36 #include "base/android/locale_utils.h" | |
| 37 #include "ui/base/l10n/l10n_util_android.h" | |
| 38 #endif | |
| 39 | |
| 40 #if defined(USE_GLIB) | |
| 41 #include <glib.h> | |
| 42 #endif | |
| 43 | |
| 44 #if defined(OS_WIN) | |
| 45 #include "ui/base/l10n/l10n_util_win.h" | |
| 46 #endif // OS_WIN | |
| 47 | |
| 48 namespace { | |
| 49 | |
| 50 static const char* const kAcceptLanguageList[] = { | |
| 51 "af", // Afrikaans | |
| 52 "am", // Amharic | |
| 53 "ar", // Arabic | |
| 54 "az", // Azerbaijani | |
| 55 "be", // Belarusian | |
| 56 "bg", // Bulgarian | |
| 57 "bh", // Bihari | |
| 58 "bn", // Bengali | |
| 59 "br", // Breton | |
| 60 "bs", // Bosnian | |
| 61 "ca", // Catalan | |
| 62 "co", // Corsican | |
| 63 "cs", // Czech | |
| 64 "cy", // Welsh | |
| 65 "da", // Danish | |
| 66 "de", // German | |
| 67 "de-AT", // German (Austria) | |
| 68 "de-CH", // German (Switzerland) | |
| 69 "de-DE", // German (Germany) | |
| 70 "el", // Greek | |
| 71 "en", // English | |
| 72 "en-AU", // English (Australia) | |
| 73 "en-CA", // English (Canada) | |
| 74 "en-GB", // English (UK) | |
| 75 "en-NZ", // English (New Zealand) | |
| 76 "en-US", // English (US) | |
| 77 "en-ZA", // English (South Africa) | |
| 78 "eo", // Esperanto | |
| 79 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American | |
| 80 // Spanish speaking countries? | |
| 81 "es", // Spanish | |
| 82 "es-419", // Spanish (Latin America) | |
| 83 "et", // Estonian | |
| 84 "eu", // Basque | |
| 85 "fa", // Persian | |
| 86 "fi", // Finnish | |
| 87 "fil", // Filipino | |
| 88 "fo", // Faroese | |
| 89 "fr", // French | |
| 90 "fr-CA", // French (Canada) | |
| 91 "fr-CH", // French (Switzerland) | |
| 92 "fr-FR", // French (France) | |
| 93 "fy", // Frisian | |
| 94 "ga", // Irish | |
| 95 "gd", // Scots Gaelic | |
| 96 "gl", // Galician | |
| 97 "gn", // Guarani | |
| 98 "gu", // Gujarati | |
| 99 "ha", // Hausa | |
| 100 "haw", // Hawaiian | |
| 101 "he", // Hebrew | |
| 102 "hi", // Hindi | |
| 103 "hr", // Croatian | |
| 104 "hu", // Hungarian | |
| 105 "hy", // Armenian | |
| 106 "ia", // Interlingua | |
| 107 "id", // Indonesian | |
| 108 "is", // Icelandic | |
| 109 "it", // Italian | |
| 110 "it-CH", // Italian (Switzerland) | |
| 111 "it-IT", // Italian (Italy) | |
| 112 "ja", // Japanese | |
| 113 "jw", // Javanese | |
| 114 "ka", // Georgian | |
| 115 "kk", // Kazakh | |
| 116 "km", // Cambodian | |
| 117 "kn", // Kannada | |
| 118 "ko", // Korean | |
| 119 "ku", // Kurdish | |
| 120 "ky", // Kyrgyz | |
| 121 "la", // Latin | |
| 122 "ln", // Lingala | |
| 123 "lo", // Laothian | |
| 124 "lt", // Lithuanian | |
| 125 "lv", // Latvian | |
| 126 "mk", // Macedonian | |
| 127 "ml", // Malayalam | |
| 128 "mn", // Mongolian | |
| 129 "mo", // Moldavian | |
| 130 "mr", // Marathi | |
| 131 "ms", // Malay | |
| 132 "mt", // Maltese | |
| 133 "nb", // Norwegian (Bokmal) | |
| 134 "ne", // Nepali | |
| 135 "nl", // Dutch | |
| 136 "nn", // Norwegian (Nynorsk) | |
| 137 "no", // Norwegian | |
| 138 "oc", // Occitan | |
| 139 "om", // Oromo | |
| 140 "or", // Oriya | |
| 141 "pa", // Punjabi | |
| 142 "pl", // Polish | |
| 143 "ps", // Pashto | |
| 144 "pt", // Portuguese | |
| 145 "pt-BR", // Portuguese (Brazil) | |
| 146 "pt-PT", // Portuguese (Portugal) | |
| 147 "qu", // Quechua | |
| 148 "rm", // Romansh | |
| 149 "ro", // Romanian | |
| 150 "ru", // Russian | |
| 151 "sd", // Sindhi | |
| 152 "sh", // Serbo-Croatian | |
| 153 "si", // Sinhalese | |
| 154 "sk", // Slovak | |
| 155 "sl", // Slovenian | |
| 156 "sn", // Shona | |
| 157 "so", // Somali | |
| 158 "sq", // Albanian | |
| 159 "sr", // Serbian | |
| 160 "st", // Sesotho | |
| 161 "su", // Sundanese | |
| 162 "sv", // Swedish | |
| 163 "sw", // Swahili | |
| 164 "ta", // Tamil | |
| 165 "te", // Telugu | |
| 166 "tg", // Tajik | |
| 167 "th", // Thai | |
| 168 "ti", // Tigrinya | |
| 169 "tk", // Turkmen | |
| 170 "to", // Tonga | |
| 171 "tr", // Turkish | |
| 172 "tt", // Tatar | |
| 173 "tw", // Twi | |
| 174 "ug", // Uighur | |
| 175 "uk", // Ukrainian | |
| 176 "ur", // Urdu | |
| 177 "uz", // Uzbek | |
| 178 "vi", // Vietnamese | |
| 179 "xh", // Xhosa | |
| 180 "yi", // Yiddish | |
| 181 "yo", // Yoruba | |
| 182 "zh", // Chinese | |
| 183 "zh-CN", // Chinese (Simplified) | |
| 184 "zh-TW", // Chinese (Traditional) | |
| 185 "zu", // Zulu | |
| 186 }; | |
| 187 | |
| 188 // Returns true if |locale_name| has an alias in the ICU data file. | |
| 189 bool IsDuplicateName(const std::string& locale_name) { | |
| 190 static const char* const kDuplicateNames[] = { | |
| 191 "en", | |
| 192 "pt", | |
| 193 "zh", | |
| 194 "zh_hans_cn", | |
| 195 "zh_hant_hk", | |
| 196 "zh_hant_mo", | |
| 197 "zh_hans_sg", | |
| 198 "zh_hant_tw" | |
| 199 }; | |
| 200 | |
| 201 // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain). | |
| 202 // 'es-419' (Spanish in Latin America) is not available in ICU so that it | |
| 203 // has to be added manually in GetAvailableLocales(). | |
| 204 if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_")) | |
| 205 return true; | |
| 206 for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) { | |
| 207 if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0) | |
| 208 return true; | |
| 209 } | |
| 210 return false; | |
| 211 } | |
| 212 | |
| 213 // We added 30+ minimally populated locales with only a few entries | |
| 214 // (exemplar character set, script, writing direction and its own | |
| 215 // lanaguage name). These locales have to be distinguished from the | |
| 216 // fully populated locales to which Chrome is localized. | |
| 217 bool IsLocalePartiallyPopulated(const std::string& locale_name) { | |
| 218 // For partially populated locales, even the translation for "English" | |
| 219 // is not available. A more robust/elegant way to check is to add a special | |
| 220 // field (say, 'isPartial' to our version of ICU locale files) and | |
| 221 // check its value, but this hack seems to work well. | |
| 222 return !l10n_util::IsLocaleNameTranslated("en", locale_name); | |
| 223 } | |
| 224 | |
| 225 #if !defined(OS_MACOSX) | |
| 226 bool IsLocaleAvailable(const std::string& locale) { | |
| 227 // If locale has any illegal characters in it, we don't want to try to | |
| 228 // load it because it may be pointing outside the locale data file directory. | |
| 229 if (!base::i18n::IsFilenameLegal(base::ASCIIToUTF16(locale))) | |
| 230 return false; | |
| 231 | |
| 232 // IsLocalePartiallyPopulated() can be called here for an early return w/o | |
| 233 // checking the resource availability below. It'd help when Chrome is run | |
| 234 // under a system locale Chrome is not localized to (e.g.Farsi on Linux), | |
| 235 // but it'd slow down the start up time a little bit for locales Chrome is | |
| 236 // localized to. So, we don't call it here. | |
| 237 if (!l10n_util::IsLocaleSupportedByOS(locale)) | |
| 238 return false; | |
| 239 | |
| 240 // If the ResourceBundle is not yet initialized, return false to avoid the | |
| 241 // CHECK failure in ResourceBundle::GetSharedInstance(). | |
| 242 if (!ResourceBundle::HasSharedInstance()) | |
| 243 return false; | |
| 244 | |
| 245 // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function | |
| 246 // so that this can be invoked without initializing the global instance. | |
| 247 // See crbug.com/230432: CHECK failure in GetUserDataDir(). | |
| 248 return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale); | |
| 249 } | |
| 250 #endif | |
| 251 | |
| 252 // On Linux, the text layout engine Pango determines paragraph directionality | |
| 253 // by looking at the first strongly-directional character in the text. This | |
| 254 // means text such as "Google Chrome foo bar..." will be layed out LTR even | |
| 255 // if "foo bar" is RTL. So this function prepends the necessary RLM in such | |
| 256 // cases. | |
| 257 void AdjustParagraphDirectionality(base::string16* paragraph) { | |
| 258 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID) | |
| 259 if (base::i18n::IsRTL() && | |
| 260 base::i18n::StringContainsStrongRTLChars(*paragraph)) { | |
| 261 paragraph->insert(0, 1, | |
| 262 static_cast<base::char16>(base::i18n::kRightToLeftMark)); | |
| 263 } | |
| 264 #endif | |
| 265 } | |
| 266 | |
| 267 struct AvailableLocalesTraits | |
| 268 : base::DefaultLazyInstanceTraits<std::vector<std::string> > { | |
| 269 static std::vector<std::string>* New(void* instance) { | |
| 270 std::vector<std::string>* locales = | |
| 271 base::DefaultLazyInstanceTraits<std::vector<std::string> >::New( | |
| 272 instance); | |
| 273 int num_locales = uloc_countAvailable(); | |
| 274 for (int i = 0; i < num_locales; ++i) { | |
| 275 std::string locale_name = uloc_getAvailable(i); | |
| 276 // Filter out the names that have aliases. | |
| 277 if (IsDuplicateName(locale_name)) | |
| 278 continue; | |
| 279 // Filter out locales for which we have only partially populated data | |
| 280 // and to which Chrome is not localized. | |
| 281 if (IsLocalePartiallyPopulated(locale_name)) | |
| 282 continue; | |
| 283 if (!l10n_util::IsLocaleSupportedByOS(locale_name)) | |
| 284 continue; | |
| 285 // Normalize underscores to hyphens because that's what our locale files | |
| 286 // use. | |
| 287 std::replace(locale_name.begin(), locale_name.end(), '_', '-'); | |
| 288 | |
| 289 // Map the Chinese locale names over to zh-CN and zh-TW. | |
| 290 if (LowerCaseEqualsASCII(locale_name, "zh-hans")) { | |
| 291 locale_name = "zh-CN"; | |
| 292 } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) { | |
| 293 locale_name = "zh-TW"; | |
| 294 } | |
| 295 locales->push_back(locale_name); | |
| 296 } | |
| 297 | |
| 298 // Manually add 'es-419' to the list. See the comment in IsDuplicateName(). | |
| 299 locales->push_back("es-419"); | |
| 300 return locales; | |
| 301 } | |
| 302 }; | |
| 303 | |
| 304 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits> | |
| 305 g_available_locales = LAZY_INSTANCE_INITIALIZER; | |
| 306 | |
| 307 } // namespace | |
| 308 | |
| 309 namespace l10n_util { | |
| 310 | |
| 311 std::string GetCanonicalLocale(const std::string& locale) { | |
| 312 return base::i18n::GetCanonicalLocale(locale.c_str()); | |
| 313 } | |
| 314 | |
| 315 std::string GetLanguage(const std::string& locale) { | |
| 316 const std::string::size_type hyphen_pos = locale.find('-'); | |
| 317 return std::string(locale, 0, hyphen_pos); | |
| 318 } | |
| 319 | |
| 320 bool CheckAndResolveLocale(const std::string& locale, | |
| 321 std::string* resolved_locale) { | |
| 322 #if defined(OS_MACOSX) | |
| 323 NOTIMPLEMENTED(); | |
| 324 return false; | |
| 325 #else | |
| 326 if (IsLocaleAvailable(locale)) { | |
| 327 *resolved_locale = locale; | |
| 328 return true; | |
| 329 } | |
| 330 | |
| 331 // If there's a variant, skip over it so we can try without the region | |
| 332 // code. For example, ca_ES@valencia should cause us to try ca@valencia | |
| 333 // before ca. | |
| 334 std::string::size_type variant_pos = locale.find('@'); | |
| 335 if (variant_pos != std::string::npos) | |
| 336 return false; | |
| 337 | |
| 338 // If the locale matches language but not country, use that instead. | |
| 339 // TODO(jungshik) : Nothing is done about languages that Chrome | |
| 340 // does not support but available on Windows. We fall | |
| 341 // back to en-US in GetApplicationLocale so that it's a not critical, | |
| 342 // but we can do better. | |
| 343 const std::string lang(GetLanguage(locale)); | |
| 344 if (lang.size() < locale.size()) { | |
| 345 std::string region(locale, lang.size() + 1); | |
| 346 std::string tmp_locale(lang); | |
| 347 // Map es-RR other than es-ES to es-419 (Chrome's Latin American | |
| 348 // Spanish locale). | |
| 349 if (LowerCaseEqualsASCII(lang, "es") && | |
| 350 !LowerCaseEqualsASCII(region, "es")) { | |
| 351 tmp_locale.append("-419"); | |
| 352 } else if (LowerCaseEqualsASCII(lang, "zh")) { | |
| 353 // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN. | |
| 354 if (LowerCaseEqualsASCII(region, "hk") || | |
| 355 LowerCaseEqualsASCII(region, "mo")) { // Macao | |
| 356 tmp_locale.append("-TW"); | |
| 357 } else { | |
| 358 tmp_locale.append("-CN"); | |
| 359 } | |
| 360 } else if (LowerCaseEqualsASCII(lang, "en")) { | |
| 361 // Map Australian, Canadian, New Zealand and South African English | |
| 362 // to British English for now. | |
| 363 // TODO(jungshik): en-CA may have to change sides once | |
| 364 // we have OS locale separate from app locale (Chrome's UI language). | |
| 365 if (LowerCaseEqualsASCII(region, "au") || | |
| 366 LowerCaseEqualsASCII(region, "ca") || | |
| 367 LowerCaseEqualsASCII(region, "nz") || | |
| 368 LowerCaseEqualsASCII(region, "za")) { | |
| 369 tmp_locale.append("-GB"); | |
| 370 } else { | |
| 371 tmp_locale.append("-US"); | |
| 372 } | |
| 373 } | |
| 374 if (IsLocaleAvailable(tmp_locale)) { | |
| 375 resolved_locale->swap(tmp_locale); | |
| 376 return true; | |
| 377 } | |
| 378 } | |
| 379 | |
| 380 // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US. | |
| 381 struct { | |
| 382 const char* source; | |
| 383 const char* dest; | |
| 384 } alias_map[] = { | |
| 385 {"no", "nb"}, | |
| 386 {"tl", "fil"}, | |
| 387 {"iw", "he"}, | |
| 388 {"en", "en-US"}, | |
| 389 }; | |
| 390 | |
| 391 for (size_t i = 0; i < arraysize(alias_map); ++i) { | |
| 392 if (LowerCaseEqualsASCII(lang, alias_map[i].source)) { | |
| 393 std::string tmp_locale(alias_map[i].dest); | |
| 394 if (IsLocaleAvailable(tmp_locale)) { | |
| 395 resolved_locale->swap(tmp_locale); | |
| 396 return true; | |
| 397 } | |
| 398 } | |
| 399 } | |
| 400 | |
| 401 return false; | |
| 402 #endif | |
| 403 } | |
| 404 | |
| 405 std::string GetApplicationLocaleInternal(const std::string& pref_locale) { | |
| 406 #if defined(OS_MACOSX) | |
| 407 | |
| 408 // Use any override (Cocoa for the browser), otherwise use the preference | |
| 409 // passed to the function. | |
| 410 std::string app_locale = l10n_util::GetLocaleOverride(); | |
| 411 if (app_locale.empty()) | |
| 412 app_locale = pref_locale; | |
| 413 | |
| 414 // The above should handle all of the cases Chrome normally hits, but for some | |
| 415 // unit tests, we need something to fall back too. | |
| 416 if (app_locale.empty()) | |
| 417 app_locale = "en-US"; | |
| 418 | |
| 419 return app_locale; | |
| 420 | |
| 421 #else | |
| 422 | |
| 423 std::string resolved_locale; | |
| 424 std::vector<std::string> candidates; | |
| 425 | |
| 426 // We only use --lang and the app pref on Windows. On Linux, we only | |
| 427 // look at the LC_*/LANG environment variables. We do, however, pass --lang | |
| 428 // to renderer and plugin processes so they know what language the parent | |
| 429 // process decided to use. | |
| 430 | |
| 431 #if defined(OS_WIN) | |
| 432 | |
| 433 // First, try the preference value. | |
| 434 if (!pref_locale.empty()) | |
| 435 candidates.push_back(GetCanonicalLocale(pref_locale)); | |
| 436 | |
| 437 // Next, try the overridden locale. | |
| 438 const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides(); | |
| 439 if (!languages.empty()) { | |
| 440 candidates.reserve(candidates.size() + languages.size()); | |
| 441 std::transform(languages.begin(), languages.end(), | |
| 442 std::back_inserter(candidates), &GetCanonicalLocale); | |
| 443 } else { | |
| 444 // If no override was set, defer to ICU | |
| 445 candidates.push_back(base::i18n::GetConfiguredLocale()); | |
| 446 } | |
| 447 | |
| 448 #elif defined(OS_ANDROID) | |
| 449 | |
| 450 // On Android, query java.util.Locale for the default locale. | |
| 451 candidates.push_back(base::android::GetDefaultLocale()); | |
| 452 | |
| 453 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS) | |
| 454 | |
| 455 // GLib implements correct environment variable parsing with | |
| 456 // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG. | |
| 457 // We used to use our custom parsing code along with ICU for this purpose. | |
| 458 // If we have a port that does not depend on GTK, we have to | |
| 459 // restore our custom code for that port. | |
| 460 const char* const* languages = g_get_language_names(); | |
| 461 DCHECK(languages); // A valid pointer is guaranteed. | |
| 462 DCHECK(*languages); // At least one entry, "C", is guaranteed. | |
| 463 | |
| 464 for (; *languages != NULL; ++languages) { | |
| 465 candidates.push_back(base::i18n::GetCanonicalLocale(*languages)); | |
| 466 } | |
| 467 | |
| 468 #else | |
| 469 | |
| 470 // By default, use the application locale preference. This applies to ChromeOS | |
| 471 // and linux systems without glib. | |
| 472 if (!pref_locale.empty()) | |
| 473 candidates.push_back(pref_locale); | |
| 474 | |
| 475 #endif | |
| 476 | |
| 477 std::vector<std::string>::const_iterator i = candidates.begin(); | |
| 478 for (; i != candidates.end(); ++i) { | |
| 479 if (CheckAndResolveLocale(*i, &resolved_locale)) { | |
| 480 return resolved_locale; | |
| 481 } | |
| 482 } | |
| 483 | |
| 484 // Fallback on en-US. | |
| 485 const std::string fallback_locale("en-US"); | |
| 486 if (IsLocaleAvailable(fallback_locale)) { | |
| 487 return fallback_locale; | |
| 488 } | |
| 489 | |
| 490 return std::string(); | |
| 491 | |
| 492 #endif | |
| 493 } | |
| 494 | |
| 495 std::string GetApplicationLocale(const std::string& pref_locale, | |
| 496 bool set_icu_locale) { | |
| 497 const std::string locale = GetApplicationLocaleInternal(pref_locale); | |
| 498 if (set_icu_locale && !locale.empty()) | |
| 499 base::i18n::SetICUDefaultLocale(locale); | |
| 500 return locale; | |
| 501 } | |
| 502 | |
| 503 std::string GetApplicationLocale(const std::string& pref_locale) { | |
| 504 return GetApplicationLocale(pref_locale, true /* set_icu_locale */); | |
| 505 } | |
| 506 | |
| 507 bool IsLocaleNameTranslated(const char* locale, | |
| 508 const std::string& display_locale) { | |
| 509 base::string16 display_name = | |
| 510 l10n_util::GetDisplayNameForLocale(locale, display_locale, false); | |
| 511 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not | |
| 512 // uloc_getDisplayName returns the actual translation or the default | |
| 513 // value (locale code), we have to rely on this hack to tell whether | |
| 514 // the translation is available or not. If ICU doesn't have a translated | |
| 515 // name for this locale, GetDisplayNameForLocale will just return the | |
| 516 // locale code. | |
| 517 return !base::IsStringASCII(display_name) || | |
| 518 base::UTF16ToASCII(display_name) != locale; | |
| 519 } | |
| 520 | |
| 521 base::string16 GetDisplayNameForLocale(const std::string& locale, | |
| 522 const std::string& display_locale, | |
| 523 bool is_for_ui) { | |
| 524 std::string locale_code = locale; | |
| 525 // Internally, we use the language code of zh-CN and zh-TW, but we want the | |
| 526 // display names to be Chinese (Simplified) and Chinese (Traditional) instead | |
| 527 // of Chinese (China) and Chinese (Taiwan). | |
| 528 // Translate uses "tl" (Tagalog) to mean "fil" (Filipino) until Google | |
| 529 // translate is changed to understand "fil". Make "tl" alias to "fil". | |
| 530 if (locale_code == "zh-CN") | |
| 531 locale_code = "zh-Hans"; | |
| 532 else if (locale_code == "zh-TW") | |
| 533 locale_code = "zh-Hant"; | |
| 534 else if (locale_code == "tl") | |
| 535 locale_code = "fil"; | |
| 536 | |
| 537 base::string16 display_name; | |
| 538 #if defined(OS_ANDROID) | |
| 539 // Use Java API to get locale display name so that we can remove most of | |
| 540 // the lang data from icu data to reduce binary size, except for zh-Hans and | |
| 541 // zh-Hant because the current Android Java API doesn't support scripts. | |
| 542 // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once | |
| 543 // Android Java API supports scripts. | |
| 544 if (!StartsWithASCII(locale_code, "zh-Han", true)) { | |
| 545 display_name = GetDisplayNameForLocale(locale_code, display_locale); | |
| 546 } else | |
| 547 #endif | |
| 548 { | |
| 549 UErrorCode error = U_ZERO_ERROR; | |
| 550 const int kBufferSize = 1024; | |
| 551 | |
| 552 int actual_size = uloc_getDisplayName( | |
| 553 locale_code.c_str(), display_locale.c_str(), | |
| 554 WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error); | |
| 555 DCHECK(U_SUCCESS(error)); | |
| 556 display_name.resize(actual_size); | |
| 557 } | |
| 558 | |
| 559 // Add directional markup so parentheses are properly placed. | |
| 560 if (is_for_ui && base::i18n::IsRTL()) | |
| 561 base::i18n::AdjustStringForLocaleDirection(&display_name); | |
| 562 return display_name; | |
| 563 } | |
| 564 | |
| 565 base::string16 GetDisplayNameForCountry(const std::string& country_code, | |
| 566 const std::string& display_locale) { | |
| 567 return GetDisplayNameForLocale("_" + country_code, display_locale, false); | |
| 568 } | |
| 569 | |
| 570 std::string NormalizeLocale(const std::string& locale) { | |
| 571 std::string normalized_locale(locale); | |
| 572 std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_'); | |
| 573 | |
| 574 return normalized_locale; | |
| 575 } | |
| 576 | |
| 577 void GetParentLocales(const std::string& current_locale, | |
| 578 std::vector<std::string>* parent_locales) { | |
| 579 std::string locale(NormalizeLocale(current_locale)); | |
| 580 | |
| 581 const int kNameCapacity = 256; | |
| 582 char parent[kNameCapacity]; | |
| 583 base::strlcpy(parent, locale.c_str(), kNameCapacity); | |
| 584 parent_locales->push_back(parent); | |
| 585 UErrorCode err = U_ZERO_ERROR; | |
| 586 while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) { | |
| 587 if (U_FAILURE(err)) | |
| 588 break; | |
| 589 parent_locales->push_back(parent); | |
| 590 } | |
| 591 } | |
| 592 | |
| 593 bool IsValidLocaleSyntax(const std::string& locale) { | |
| 594 // Check that the length is plausible. | |
| 595 if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY) | |
| 596 return false; | |
| 597 | |
| 598 // Strip off the part after an '@' sign, which might contain keywords, | |
| 599 // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil. | |
| 600 // We don't validate that part much, just check that there's at least one | |
| 601 // equals sign in a plausible place. Normalize the prefix so that hyphens | |
| 602 // are changed to underscores. | |
| 603 std::string prefix = NormalizeLocale(locale); | |
| 604 size_t split_point = locale.find("@"); | |
| 605 if (split_point != std::string::npos) { | |
| 606 std::string keywords = locale.substr(split_point + 1); | |
| 607 prefix = locale.substr(0, split_point); | |
| 608 | |
| 609 size_t equals_loc = keywords.find("="); | |
| 610 if (equals_loc == std::string::npos || | |
| 611 equals_loc < 1 || equals_loc > keywords.size() - 2) | |
| 612 return false; | |
| 613 } | |
| 614 | |
| 615 // Check that all characters before the at-sign are alphanumeric or | |
| 616 // underscore. | |
| 617 for (size_t i = 0; i < prefix.size(); i++) { | |
| 618 char ch = prefix[i]; | |
| 619 if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_') | |
| 620 return false; | |
| 621 } | |
| 622 | |
| 623 // Check that the initial token (before the first hyphen/underscore) | |
| 624 // is 1 - 3 alphabetical characters (a language tag). | |
| 625 for (size_t i = 0; i < prefix.size(); i++) { | |
| 626 char ch = prefix[i]; | |
| 627 if (ch == '_') { | |
| 628 if (i < 1 || i > 3) | |
| 629 return false; | |
| 630 break; | |
| 631 } | |
| 632 if (!IsAsciiAlpha(ch)) | |
| 633 return false; | |
| 634 } | |
| 635 | |
| 636 // Check that the all tokens after the initial token are 1 - 8 characters. | |
| 637 // (Tokenize/StringTokenizer don't work here, they collapse multiple | |
| 638 // delimiters into one.) | |
| 639 int token_len = 0; | |
| 640 int token_index = 0; | |
| 641 for (size_t i = 0; i < prefix.size(); i++) { | |
| 642 if (prefix[i] != '_') { | |
| 643 token_len++; | |
| 644 continue; | |
| 645 } | |
| 646 | |
| 647 if (token_index > 0 && (token_len < 1 || token_len > 8)) { | |
| 648 return false; | |
| 649 } | |
| 650 token_index++; | |
| 651 token_len = 0; | |
| 652 } | |
| 653 if (token_index == 0 && (token_len < 1 || token_len > 3)) { | |
| 654 return false; | |
| 655 } else if (token_len < 1 || token_len > 8) { | |
| 656 return false; | |
| 657 } | |
| 658 | |
| 659 return true; | |
| 660 } | |
| 661 | |
| 662 std::string GetStringUTF8(int message_id) { | |
| 663 return base::UTF16ToUTF8(GetStringUTF16(message_id)); | |
| 664 } | |
| 665 | |
| 666 base::string16 GetStringUTF16(int message_id) { | |
| 667 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); | |
| 668 base::string16 str = rb.GetLocalizedString(message_id); | |
| 669 AdjustParagraphDirectionality(&str); | |
| 670 | |
| 671 return str; | |
| 672 } | |
| 673 | |
| 674 base::string16 GetStringFUTF16(int message_id, | |
| 675 const std::vector<base::string16>& replacements, | |
| 676 std::vector<size_t>* offsets) { | |
| 677 // TODO(tc): We could save a string copy if we got the raw string as | |
| 678 // a StringPiece and were able to call ReplaceStringPlaceholders with | |
| 679 // a StringPiece format string and base::string16 substitution strings. In | |
| 680 // practice, the strings should be relatively short. | |
| 681 ResourceBundle& rb = ResourceBundle::GetSharedInstance(); | |
| 682 const base::string16& format_string = rb.GetLocalizedString(message_id); | |
| 683 | |
| 684 #ifndef NDEBUG | |
| 685 // Make sure every replacement string is being used, so we don't just | |
| 686 // silently fail to insert one. If |offsets| is non-NULL, then don't do this | |
| 687 // check as the code may simply want to find the placeholders rather than | |
| 688 // actually replacing them. | |
| 689 if (!offsets) { | |
| 690 std::string utf8_string = base::UTF16ToUTF8(format_string); | |
| 691 | |
| 692 // $9 is the highest allowed placeholder. | |
| 693 for (size_t i = 0; i < 9; ++i) { | |
| 694 bool placeholder_should_exist = replacements.size() > i; | |
| 695 | |
| 696 std::string placeholder = | |
| 697 base::StringPrintf("$%d", static_cast<int>(i + 1)); | |
| 698 size_t pos = utf8_string.find(placeholder.c_str()); | |
| 699 if (placeholder_should_exist) { | |
| 700 DCHECK_NE(std::string::npos, pos) << | |
| 701 " Didn't find a " << placeholder << " placeholder in " << | |
| 702 utf8_string; | |
| 703 } else { | |
| 704 DCHECK_EQ(std::string::npos, pos) << | |
| 705 " Unexpectedly found a " << placeholder << " placeholder in " << | |
| 706 utf8_string; | |
| 707 } | |
| 708 } | |
| 709 } | |
| 710 #endif | |
| 711 | |
| 712 base::string16 formatted = ReplaceStringPlaceholders( | |
| 713 format_string, replacements, offsets); | |
| 714 AdjustParagraphDirectionality(&formatted); | |
| 715 | |
| 716 return formatted; | |
| 717 } | |
| 718 | |
| 719 std::string GetStringFUTF8(int message_id, | |
| 720 const base::string16& a) { | |
| 721 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a)); | |
| 722 } | |
| 723 | |
| 724 std::string GetStringFUTF8(int message_id, | |
| 725 const base::string16& a, | |
| 726 const base::string16& b) { | |
| 727 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b)); | |
| 728 } | |
| 729 | |
| 730 std::string GetStringFUTF8(int message_id, | |
| 731 const base::string16& a, | |
| 732 const base::string16& b, | |
| 733 const base::string16& c) { | |
| 734 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c)); | |
| 735 } | |
| 736 | |
| 737 std::string GetStringFUTF8(int message_id, | |
| 738 const base::string16& a, | |
| 739 const base::string16& b, | |
| 740 const base::string16& c, | |
| 741 const base::string16& d) { | |
| 742 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d)); | |
| 743 } | |
| 744 | |
| 745 base::string16 GetStringFUTF16(int message_id, | |
| 746 const base::string16& a) { | |
| 747 std::vector<base::string16> replacements; | |
| 748 replacements.push_back(a); | |
| 749 return GetStringFUTF16(message_id, replacements, NULL); | |
| 750 } | |
| 751 | |
| 752 base::string16 GetStringFUTF16(int message_id, | |
| 753 const base::string16& a, | |
| 754 const base::string16& b) { | |
| 755 return GetStringFUTF16(message_id, a, b, NULL); | |
| 756 } | |
| 757 | |
| 758 base::string16 GetStringFUTF16(int message_id, | |
| 759 const base::string16& a, | |
| 760 const base::string16& b, | |
| 761 const base::string16& c) { | |
| 762 std::vector<base::string16> replacements; | |
| 763 replacements.push_back(a); | |
| 764 replacements.push_back(b); | |
| 765 replacements.push_back(c); | |
| 766 return GetStringFUTF16(message_id, replacements, NULL); | |
| 767 } | |
| 768 | |
| 769 base::string16 GetStringFUTF16(int message_id, | |
| 770 const base::string16& a, | |
| 771 const base::string16& b, | |
| 772 const base::string16& c, | |
| 773 const base::string16& d) { | |
| 774 std::vector<base::string16> replacements; | |
| 775 replacements.push_back(a); | |
| 776 replacements.push_back(b); | |
| 777 replacements.push_back(c); | |
| 778 replacements.push_back(d); | |
| 779 return GetStringFUTF16(message_id, replacements, NULL); | |
| 780 } | |
| 781 | |
| 782 base::string16 GetStringFUTF16(int message_id, | |
| 783 const base::string16& a, | |
| 784 const base::string16& b, | |
| 785 const base::string16& c, | |
| 786 const base::string16& d, | |
| 787 const base::string16& e) { | |
| 788 std::vector<base::string16> replacements; | |
| 789 replacements.push_back(a); | |
| 790 replacements.push_back(b); | |
| 791 replacements.push_back(c); | |
| 792 replacements.push_back(d); | |
| 793 replacements.push_back(e); | |
| 794 return GetStringFUTF16(message_id, replacements, NULL); | |
| 795 } | |
| 796 | |
| 797 base::string16 GetStringFUTF16(int message_id, | |
| 798 const base::string16& a, | |
| 799 size_t* offset) { | |
| 800 DCHECK(offset); | |
| 801 std::vector<size_t> offsets; | |
| 802 std::vector<base::string16> replacements; | |
| 803 replacements.push_back(a); | |
| 804 base::string16 result = GetStringFUTF16(message_id, replacements, &offsets); | |
| 805 DCHECK(offsets.size() == 1); | |
| 806 *offset = offsets[0]; | |
| 807 return result; | |
| 808 } | |
| 809 | |
| 810 base::string16 GetStringFUTF16(int message_id, | |
| 811 const base::string16& a, | |
| 812 const base::string16& b, | |
| 813 std::vector<size_t>* offsets) { | |
| 814 std::vector<base::string16> replacements; | |
| 815 replacements.push_back(a); | |
| 816 replacements.push_back(b); | |
| 817 return GetStringFUTF16(message_id, replacements, offsets); | |
| 818 } | |
| 819 | |
| 820 base::string16 GetStringFUTF16Int(int message_id, int a) { | |
| 821 return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a))); | |
| 822 } | |
| 823 | |
| 824 base::string16 GetStringFUTF16Int(int message_id, int64 a) { | |
| 825 return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a))); | |
| 826 } | |
| 827 | |
| 828 base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids, | |
| 829 int number) { | |
| 830 scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids); | |
| 831 DCHECK(format); | |
| 832 | |
| 833 UErrorCode err = U_ZERO_ERROR; | |
| 834 icu::UnicodeString result_files_string = format->format(number, err); | |
| 835 int capacity = result_files_string.length() + 1; | |
| 836 DCHECK_GT(capacity, 1); | |
| 837 base::string16 result; | |
| 838 result_files_string.extract( | |
| 839 static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err); | |
| 840 DCHECK(U_SUCCESS(err)); | |
| 841 return result; | |
| 842 } | |
| 843 | |
| 844 std::string GetPluralStringFUTF8(const std::vector<int>& message_ids, | |
| 845 int number) { | |
| 846 return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number)); | |
| 847 } | |
| 848 | |
| 849 void SortStrings16(const std::string& locale, | |
| 850 std::vector<base::string16>* strings) { | |
| 851 SortVectorWithStringKey(locale, strings, false); | |
| 852 } | |
| 853 | |
| 854 const std::vector<std::string>& GetAvailableLocales() { | |
| 855 return g_available_locales.Get(); | |
| 856 } | |
| 857 | |
| 858 void GetAcceptLanguagesForLocale(const std::string& display_locale, | |
| 859 std::vector<std::string>* locale_codes) { | |
| 860 for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) { | |
| 861 if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i], | |
| 862 display_locale)) | |
| 863 // TODO(jungshik) : Put them at the of the list with language codes | |
| 864 // enclosed by brackets instead of skipping. | |
| 865 continue; | |
| 866 locale_codes->push_back(kAcceptLanguageList[i]); | |
| 867 } | |
| 868 } | |
| 869 | |
| 870 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) { | |
| 871 int width = 0; | |
| 872 base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width); | |
| 873 DCHECK_GT(width, 0); | |
| 874 return width; | |
| 875 } | |
| 876 | |
| 877 const char* const* GetAcceptLanguageListForTesting() { | |
| 878 return kAcceptLanguageList; | |
| 879 } | |
| 880 | |
| 881 size_t GetAcceptLanguageListSizeForTesting() { | |
| 882 return arraysize(kAcceptLanguageList); | |
| 883 } | |
| 884 | |
| 885 } // namespace l10n_util | |
| OLD | NEW |