| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/i18n/rtl.h" | |
| 6 | |
| 7 #include "base/files/file_path.h" | |
| 8 #include "base/logging.h" | |
| 9 #include "base/strings/string_util.h" | |
| 10 #include "base/strings/sys_string_conversions.h" | |
| 11 #include "base/strings/utf_string_conversions.h" | |
| 12 #include "third_party/icu/source/common/unicode/locid.h" | |
| 13 #include "third_party/icu/source/common/unicode/uchar.h" | |
| 14 #include "third_party/icu/source/common/unicode/uscript.h" | |
| 15 #include "third_party/icu/source/i18n/unicode/coll.h" | |
| 16 | |
| 17 namespace { | |
| 18 | |
| 19 // Extract language, country and variant, but ignore keywords. For example, | |
| 20 // en-US, ca@valencia, ca-ES@valencia. | |
| 21 std::string GetLocaleString(const icu::Locale& locale) { | |
| 22 const char* language = locale.getLanguage(); | |
| 23 const char* country = locale.getCountry(); | |
| 24 const char* variant = locale.getVariant(); | |
| 25 | |
| 26 std::string result = | |
| 27 (language != NULL && *language != '\0') ? language : "und"; | |
| 28 | |
| 29 if (country != NULL && *country != '\0') { | |
| 30 result += '-'; | |
| 31 result += country; | |
| 32 } | |
| 33 | |
| 34 if (variant != NULL && *variant != '\0') { | |
| 35 std::string variant_str(variant); | |
| 36 base::StringToLowerASCII(&variant_str); | |
| 37 result += '@' + variant_str; | |
| 38 } | |
| 39 | |
| 40 return result; | |
| 41 } | |
| 42 | |
| 43 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong | |
| 44 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to | |
| 45 // http://unicode.org/reports/tr9/ for more information. | |
| 46 base::i18n::TextDirection GetCharacterDirection(UChar32 character) { | |
| 47 // Now that we have the character, we use ICU in order to query for the | |
| 48 // appropriate Unicode BiDi character type. | |
| 49 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); | |
| 50 if ((property == U_RIGHT_TO_LEFT) || | |
| 51 (property == U_RIGHT_TO_LEFT_ARABIC) || | |
| 52 (property == U_RIGHT_TO_LEFT_EMBEDDING) || | |
| 53 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { | |
| 54 return base::i18n::RIGHT_TO_LEFT; | |
| 55 } else if ((property == U_LEFT_TO_RIGHT) || | |
| 56 (property == U_LEFT_TO_RIGHT_EMBEDDING) || | |
| 57 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { | |
| 58 return base::i18n::LEFT_TO_RIGHT; | |
| 59 } | |
| 60 return base::i18n::UNKNOWN_DIRECTION; | |
| 61 } | |
| 62 | |
| 63 } // namespace | |
| 64 | |
| 65 namespace base { | |
| 66 namespace i18n { | |
| 67 | |
| 68 // Represents the locale-specific ICU text direction. | |
| 69 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; | |
| 70 | |
| 71 // Convert the ICU default locale to a string. | |
| 72 std::string GetConfiguredLocale() { | |
| 73 return GetLocaleString(icu::Locale::getDefault()); | |
| 74 } | |
| 75 | |
| 76 // Convert the ICU canonicalized locale to a string. | |
| 77 std::string GetCanonicalLocale(const std::string& locale) { | |
| 78 return GetLocaleString(icu::Locale::createCanonical(locale.c_str())); | |
| 79 } | |
| 80 | |
| 81 // Convert Chrome locale name to ICU locale name | |
| 82 std::string ICULocaleName(const std::string& locale_string) { | |
| 83 // If not Spanish, just return it. | |
| 84 if (locale_string.substr(0, 2) != "es") | |
| 85 return locale_string; | |
| 86 // Expand es to es-ES. | |
| 87 if (LowerCaseEqualsASCII(locale_string, "es")) | |
| 88 return "es-ES"; | |
| 89 // Map es-419 (Latin American Spanish) to es-FOO depending on the system | |
| 90 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map | |
| 91 // to es-MX (the most populous in Spanish-speaking Latin America). | |
| 92 if (LowerCaseEqualsASCII(locale_string, "es-419")) { | |
| 93 const icu::Locale& locale = icu::Locale::getDefault(); | |
| 94 std::string language = locale.getLanguage(); | |
| 95 const char* country = locale.getCountry(); | |
| 96 if (LowerCaseEqualsASCII(language, "es") && | |
| 97 !LowerCaseEqualsASCII(country, "es")) { | |
| 98 language += '-'; | |
| 99 language += country; | |
| 100 return language; | |
| 101 } | |
| 102 return "es-MX"; | |
| 103 } | |
| 104 // Currently, Chrome has only "es" and "es-419", but later we may have | |
| 105 // more specific "es-RR". | |
| 106 return locale_string; | |
| 107 } | |
| 108 | |
| 109 void SetICUDefaultLocale(const std::string& locale_string) { | |
| 110 icu::Locale locale(ICULocaleName(locale_string).c_str()); | |
| 111 UErrorCode error_code = U_ZERO_ERROR; | |
| 112 icu::Locale::setDefault(locale, error_code); | |
| 113 // This return value is actually bogus because Locale object is | |
| 114 // an ID and setDefault seems to always succeed (regardless of the | |
| 115 // presence of actual locale data). However, | |
| 116 // it does not hurt to have it as a sanity check. | |
| 117 DCHECK(U_SUCCESS(error_code)); | |
| 118 g_icu_text_direction = UNKNOWN_DIRECTION; | |
| 119 } | |
| 120 | |
| 121 bool IsRTL() { | |
| 122 return ICUIsRTL(); | |
| 123 } | |
| 124 | |
| 125 bool ICUIsRTL() { | |
| 126 if (g_icu_text_direction == UNKNOWN_DIRECTION) { | |
| 127 const icu::Locale& locale = icu::Locale::getDefault(); | |
| 128 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); | |
| 129 } | |
| 130 return g_icu_text_direction == RIGHT_TO_LEFT; | |
| 131 } | |
| 132 | |
| 133 TextDirection GetTextDirectionForLocale(const char* locale_name) { | |
| 134 UErrorCode status = U_ZERO_ERROR; | |
| 135 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); | |
| 136 DCHECK(U_SUCCESS(status)); | |
| 137 // Treat anything other than RTL as LTR. | |
| 138 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; | |
| 139 } | |
| 140 | |
| 141 TextDirection GetFirstStrongCharacterDirection(const string16& text) { | |
| 142 const UChar* string = text.c_str(); | |
| 143 size_t length = text.length(); | |
| 144 size_t position = 0; | |
| 145 while (position < length) { | |
| 146 UChar32 character; | |
| 147 size_t next_position = position; | |
| 148 U16_NEXT(string, next_position, length, character); | |
| 149 TextDirection direction = GetCharacterDirection(character); | |
| 150 if (direction != UNKNOWN_DIRECTION) | |
| 151 return direction; | |
| 152 position = next_position; | |
| 153 } | |
| 154 return LEFT_TO_RIGHT; | |
| 155 } | |
| 156 | |
| 157 TextDirection GetLastStrongCharacterDirection(const string16& text) { | |
| 158 const UChar* string = text.c_str(); | |
| 159 size_t position = text.length(); | |
| 160 while (position > 0) { | |
| 161 UChar32 character; | |
| 162 size_t prev_position = position; | |
| 163 U16_PREV(string, 0, prev_position, character); | |
| 164 TextDirection direction = GetCharacterDirection(character); | |
| 165 if (direction != UNKNOWN_DIRECTION) | |
| 166 return direction; | |
| 167 position = prev_position; | |
| 168 } | |
| 169 return LEFT_TO_RIGHT; | |
| 170 } | |
| 171 | |
| 172 TextDirection GetStringDirection(const string16& text) { | |
| 173 const UChar* string = text.c_str(); | |
| 174 size_t length = text.length(); | |
| 175 size_t position = 0; | |
| 176 | |
| 177 TextDirection result(UNKNOWN_DIRECTION); | |
| 178 while (position < length) { | |
| 179 UChar32 character; | |
| 180 size_t next_position = position; | |
| 181 U16_NEXT(string, next_position, length, character); | |
| 182 TextDirection direction = GetCharacterDirection(character); | |
| 183 if (direction != UNKNOWN_DIRECTION) { | |
| 184 if (result != UNKNOWN_DIRECTION && result != direction) | |
| 185 return UNKNOWN_DIRECTION; | |
| 186 result = direction; | |
| 187 } | |
| 188 position = next_position; | |
| 189 } | |
| 190 | |
| 191 // Handle the case of a string not containing any strong directionality | |
| 192 // characters defaulting to LEFT_TO_RIGHT. | |
| 193 if (result == UNKNOWN_DIRECTION) | |
| 194 return LEFT_TO_RIGHT; | |
| 195 | |
| 196 return result; | |
| 197 } | |
| 198 | |
| 199 #if defined(OS_WIN) | |
| 200 bool AdjustStringForLocaleDirection(string16* text) { | |
| 201 if (!IsRTL() || text->empty()) | |
| 202 return false; | |
| 203 | |
| 204 // Marking the string as LTR if the locale is RTL and the string does not | |
| 205 // contain strong RTL characters. Otherwise, mark the string as RTL. | |
| 206 bool has_rtl_chars = StringContainsStrongRTLChars(*text); | |
| 207 if (!has_rtl_chars) | |
| 208 WrapStringWithLTRFormatting(text); | |
| 209 else | |
| 210 WrapStringWithRTLFormatting(text); | |
| 211 | |
| 212 return true; | |
| 213 } | |
| 214 | |
| 215 bool UnadjustStringForLocaleDirection(string16* text) { | |
| 216 if (!IsRTL() || text->empty()) | |
| 217 return false; | |
| 218 | |
| 219 *text = StripWrappingBidiControlCharacters(*text); | |
| 220 return true; | |
| 221 } | |
| 222 #else | |
| 223 bool AdjustStringForLocaleDirection(string16* text) { | |
| 224 // On OS X & GTK the directionality of a label is determined by the first | |
| 225 // strongly directional character. | |
| 226 // However, we want to make sure that in an LTR-language-UI all strings are | |
| 227 // left aligned and vice versa. | |
| 228 // A problem can arise if we display a string which starts with user input. | |
| 229 // User input may be of the opposite directionality to the UI. So the whole | |
| 230 // string will be displayed in the opposite directionality, e.g. if we want to | |
| 231 // display in an LTR UI [such as US English]: | |
| 232 // | |
| 233 // EMAN_NOISNETXE is now installed. | |
| 234 // | |
| 235 // Since EXTENSION_NAME begins with a strong RTL char, the label's | |
| 236 // directionality will be set to RTL and the string will be displayed visually | |
| 237 // as: | |
| 238 // | |
| 239 // .is now installed EMAN_NOISNETXE | |
| 240 // | |
| 241 // In order to solve this issue, we prepend an LRM to the string. An LRM is a | |
| 242 // strongly directional LTR char. | |
| 243 // We also append an LRM at the end, which ensures that we're in an LTR | |
| 244 // context. | |
| 245 | |
| 246 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the | |
| 247 // box so there is no issue with displaying zero-width bidi control characters | |
| 248 // on any system. Thus no need for the !IsRTL() check here. | |
| 249 if (text->empty()) | |
| 250 return false; | |
| 251 | |
| 252 bool ui_direction_is_rtl = IsRTL(); | |
| 253 | |
| 254 bool has_rtl_chars = StringContainsStrongRTLChars(*text); | |
| 255 if (!ui_direction_is_rtl && has_rtl_chars) { | |
| 256 WrapStringWithRTLFormatting(text); | |
| 257 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), | |
| 258 kLeftToRightMark); | |
| 259 text->push_back(kLeftToRightMark); | |
| 260 } else if (ui_direction_is_rtl && has_rtl_chars) { | |
| 261 WrapStringWithRTLFormatting(text); | |
| 262 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), | |
| 263 kRightToLeftMark); | |
| 264 text->push_back(kRightToLeftMark); | |
| 265 } else if (ui_direction_is_rtl) { | |
| 266 WrapStringWithLTRFormatting(text); | |
| 267 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), | |
| 268 kRightToLeftMark); | |
| 269 text->push_back(kRightToLeftMark); | |
| 270 } else { | |
| 271 return false; | |
| 272 } | |
| 273 | |
| 274 return true; | |
| 275 } | |
| 276 | |
| 277 bool UnadjustStringForLocaleDirection(string16* text) { | |
| 278 if (text->empty()) | |
| 279 return false; | |
| 280 | |
| 281 size_t begin_index = 0; | |
| 282 char16 begin = text->at(begin_index); | |
| 283 if (begin == kLeftToRightMark || | |
| 284 begin == kRightToLeftMark) { | |
| 285 ++begin_index; | |
| 286 } | |
| 287 | |
| 288 size_t end_index = text->length() - 1; | |
| 289 char16 end = text->at(end_index); | |
| 290 if (end == kLeftToRightMark || | |
| 291 end == kRightToLeftMark) { | |
| 292 --end_index; | |
| 293 } | |
| 294 | |
| 295 string16 unmarked_text = | |
| 296 text->substr(begin_index, end_index - begin_index + 1); | |
| 297 *text = StripWrappingBidiControlCharacters(unmarked_text); | |
| 298 return true; | |
| 299 } | |
| 300 | |
| 301 #endif // !OS_WIN | |
| 302 | |
| 303 bool StringContainsStrongRTLChars(const string16& text) { | |
| 304 const UChar* string = text.c_str(); | |
| 305 size_t length = text.length(); | |
| 306 size_t position = 0; | |
| 307 while (position < length) { | |
| 308 UChar32 character; | |
| 309 size_t next_position = position; | |
| 310 U16_NEXT(string, next_position, length, character); | |
| 311 | |
| 312 // Now that we have the character, we use ICU in order to query for the | |
| 313 // appropriate Unicode BiDi character type. | |
| 314 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); | |
| 315 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) | |
| 316 return true; | |
| 317 | |
| 318 position = next_position; | |
| 319 } | |
| 320 | |
| 321 return false; | |
| 322 } | |
| 323 | |
| 324 void WrapStringWithLTRFormatting(string16* text) { | |
| 325 if (text->empty()) | |
| 326 return; | |
| 327 | |
| 328 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. | |
| 329 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), | |
| 330 kLeftToRightEmbeddingMark); | |
| 331 | |
| 332 // Inserting a PDF (Pop Directional Formatting) mark as the last character. | |
| 333 text->push_back(kPopDirectionalFormatting); | |
| 334 } | |
| 335 | |
| 336 void WrapStringWithRTLFormatting(string16* text) { | |
| 337 if (text->empty()) | |
| 338 return; | |
| 339 | |
| 340 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. | |
| 341 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), | |
| 342 kRightToLeftEmbeddingMark); | |
| 343 | |
| 344 // Inserting a PDF (Pop Directional Formatting) mark as the last character. | |
| 345 text->push_back(kPopDirectionalFormatting); | |
| 346 } | |
| 347 | |
| 348 void WrapPathWithLTRFormatting(const FilePath& path, | |
| 349 string16* rtl_safe_path) { | |
| 350 // Wrap the overall path with LRE-PDF pair which essentialy marks the | |
| 351 // string as a Left-To-Right string. | |
| 352 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. | |
| 353 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); | |
| 354 #if defined(OS_MACOSX) | |
| 355 rtl_safe_path->append(UTF8ToUTF16(path.value())); | |
| 356 #elif defined(OS_WIN) | |
| 357 rtl_safe_path->append(path.value()); | |
| 358 #else // defined(OS_POSIX) && !defined(OS_MACOSX) | |
| 359 std::wstring wide_path = base::SysNativeMBToWide(path.value()); | |
| 360 rtl_safe_path->append(WideToUTF16(wide_path)); | |
| 361 #endif | |
| 362 // Inserting a PDF (Pop Directional Formatting) mark as the last character. | |
| 363 rtl_safe_path->push_back(kPopDirectionalFormatting); | |
| 364 } | |
| 365 | |
| 366 string16 GetDisplayStringInLTRDirectionality(const string16& text) { | |
| 367 // Always wrap the string in RTL UI (it may be appended to RTL string). | |
| 368 // Also wrap strings with an RTL first strong character direction in LTR UI. | |
| 369 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) { | |
| 370 string16 text_mutable(text); | |
| 371 WrapStringWithLTRFormatting(&text_mutable); | |
| 372 return text_mutable; | |
| 373 } | |
| 374 return text; | |
| 375 } | |
| 376 | |
| 377 string16 StripWrappingBidiControlCharacters(const string16& text) { | |
| 378 if (text.empty()) | |
| 379 return text; | |
| 380 size_t begin_index = 0; | |
| 381 char16 begin = text[begin_index]; | |
| 382 if (begin == kLeftToRightEmbeddingMark || | |
| 383 begin == kRightToLeftEmbeddingMark || | |
| 384 begin == kLeftToRightOverride || | |
| 385 begin == kRightToLeftOverride) | |
| 386 ++begin_index; | |
| 387 size_t end_index = text.length() - 1; | |
| 388 if (text[end_index] == kPopDirectionalFormatting) | |
| 389 --end_index; | |
| 390 return text.substr(begin_index, end_index - begin_index + 1); | |
| 391 } | |
| 392 | |
| 393 } // namespace i18n | |
| 394 } // namespace base | |
| OLD | NEW |