| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/i18n/rtl.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 | |
| 9 #include "base/files/file_path.h" | |
| 10 #include "base/strings/string_util.h" | |
| 11 #include "base/strings/sys_string_conversions.h" | |
| 12 #include "base/strings/utf_string_conversions.h" | |
| 13 #include "base/test/icu_test_util.h" | |
| 14 #include "testing/gtest/include/gtest/gtest.h" | |
| 15 #include "testing/platform_test.h" | |
| 16 #include "third_party/icu/source/i18n/unicode/usearch.h" | |
| 17 | |
| 18 namespace base { | |
| 19 namespace i18n { | |
| 20 | |
| 21 namespace { | |
| 22 | |
| 23 // A test utility function to set the application default text direction. | |
| 24 void SetRTL(bool rtl) { | |
| 25 // Override the current locale/direction. | |
| 26 SetICUDefaultLocale(rtl ? "he" : "en"); | |
| 27 EXPECT_EQ(rtl, IsRTL()); | |
| 28 } | |
| 29 | |
| 30 } // namespace | |
| 31 | |
| 32 class RTLTest : public PlatformTest { | |
| 33 }; | |
| 34 | |
| 35 TEST_F(RTLTest, GetFirstStrongCharacterDirection) { | |
| 36 struct { | |
| 37 const wchar_t* text; | |
| 38 TextDirection direction; | |
| 39 } cases[] = { | |
| 40 // Test pure LTR string. | |
| 41 { L"foo bar", LEFT_TO_RIGHT }, | |
| 42 // Test pure RTL string. | |
| 43 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT}, | |
| 44 // Test bidi string in which the first character with strong directionality | |
| 45 // is a character with type L. | |
| 46 { L"foo \x05d0 bar", LEFT_TO_RIGHT }, | |
| 47 // Test bidi string in which the first character with strong directionality | |
| 48 // is a character with type R. | |
| 49 { L"\x05d0 foo bar", RIGHT_TO_LEFT }, | |
| 50 // Test bidi string which starts with a character with weak directionality | |
| 51 // and in which the first character with strong directionality is a | |
| 52 // character with type L. | |
| 53 { L"!foo \x05d0 bar", LEFT_TO_RIGHT }, | |
| 54 // Test bidi string which starts with a character with weak directionality | |
| 55 // and in which the first character with strong directionality is a | |
| 56 // character with type R. | |
| 57 { L",\x05d0 foo bar", RIGHT_TO_LEFT }, | |
| 58 // Test bidi string in which the first character with strong directionality | |
| 59 // is a character with type LRE. | |
| 60 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT }, | |
| 61 // Test bidi string in which the first character with strong directionality | |
| 62 // is a character with type LRO. | |
| 63 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT }, | |
| 64 // Test bidi string in which the first character with strong directionality | |
| 65 // is a character with type RLE. | |
| 66 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT }, | |
| 67 // Test bidi string in which the first character with strong directionality | |
| 68 // is a character with type RLO. | |
| 69 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT }, | |
| 70 // Test bidi string in which the first character with strong directionality | |
| 71 // is a character with type AL. | |
| 72 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT }, | |
| 73 // Test a string without strong directionality characters. | |
| 74 { L",!.{}", LEFT_TO_RIGHT }, | |
| 75 // Test empty string. | |
| 76 { L"", LEFT_TO_RIGHT }, | |
| 77 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to | |
| 78 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more | |
| 79 // information). | |
| 80 { | |
| 81 #if defined(WCHAR_T_IS_UTF32) | |
| 82 L" ! \x10910" L"abc 123", | |
| 83 #elif defined(WCHAR_T_IS_UTF16) | |
| 84 L" ! \xd802\xdd10" L"abc 123", | |
| 85 #else | |
| 86 #error wchar_t should be either UTF-16 or UTF-32 | |
| 87 #endif | |
| 88 RIGHT_TO_LEFT }, | |
| 89 { | |
| 90 #if defined(WCHAR_T_IS_UTF32) | |
| 91 L" ! \x10401" L"abc 123", | |
| 92 #elif defined(WCHAR_T_IS_UTF16) | |
| 93 L" ! \xd801\xdc01" L"abc 123", | |
| 94 #else | |
| 95 #error wchar_t should be either UTF-16 or UTF-32 | |
| 96 #endif | |
| 97 LEFT_TO_RIGHT }, | |
| 98 }; | |
| 99 | |
| 100 for (size_t i = 0; i < arraysize(cases); ++i) | |
| 101 EXPECT_EQ(cases[i].direction, | |
| 102 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text))); | |
| 103 } | |
| 104 | |
| 105 | |
| 106 // Note that the cases with LRE, LRO, RLE and RLO are invalid for | |
| 107 // GetLastStrongCharacterDirection because they should be followed by PDF | |
| 108 // character. | |
| 109 TEST_F(RTLTest, GetLastStrongCharacterDirection) { | |
| 110 struct { | |
| 111 const wchar_t* text; | |
| 112 TextDirection direction; | |
| 113 } cases[] = { | |
| 114 // Test pure LTR string. | |
| 115 { L"foo bar", LEFT_TO_RIGHT }, | |
| 116 // Test pure RTL string. | |
| 117 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT}, | |
| 118 // Test bidi string in which the last character with strong directionality | |
| 119 // is a character with type L. | |
| 120 { L"foo \x05d0 bar", LEFT_TO_RIGHT }, | |
| 121 // Test bidi string in which the last character with strong directionality | |
| 122 // is a character with type R. | |
| 123 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT }, | |
| 124 // Test bidi string which ends with a character with weak directionality | |
| 125 // and in which the last character with strong directionality is a | |
| 126 // character with type L. | |
| 127 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT }, | |
| 128 // Test bidi string which ends with a character with weak directionality | |
| 129 // and in which the last character with strong directionality is a | |
| 130 // character with type R. | |
| 131 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT }, | |
| 132 // Test bidi string in which the last character with strong directionality | |
| 133 // is a character with type AL. | |
| 134 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT }, | |
| 135 // Test a string without strong directionality characters. | |
| 136 { L",!.{}", LEFT_TO_RIGHT }, | |
| 137 // Test empty string. | |
| 138 { L"", LEFT_TO_RIGHT }, | |
| 139 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to | |
| 140 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more | |
| 141 // information). | |
| 142 { | |
| 143 #if defined(WCHAR_T_IS_UTF32) | |
| 144 L"abc 123" L" ! \x10910 !", | |
| 145 #elif defined(WCHAR_T_IS_UTF16) | |
| 146 L"abc 123" L" ! \xd802\xdd10 !", | |
| 147 #else | |
| 148 #error wchar_t should be either UTF-16 or UTF-32 | |
| 149 #endif | |
| 150 RIGHT_TO_LEFT }, | |
| 151 { | |
| 152 #if defined(WCHAR_T_IS_UTF32) | |
| 153 L"abc 123" L" ! \x10401 !", | |
| 154 #elif defined(WCHAR_T_IS_UTF16) | |
| 155 L"abc 123" L" ! \xd801\xdc01 !", | |
| 156 #else | |
| 157 #error wchar_t should be either UTF-16 or UTF-32 | |
| 158 #endif | |
| 159 LEFT_TO_RIGHT }, | |
| 160 }; | |
| 161 | |
| 162 for (size_t i = 0; i < arraysize(cases); ++i) | |
| 163 EXPECT_EQ(cases[i].direction, | |
| 164 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text))); | |
| 165 } | |
| 166 | |
| 167 TEST_F(RTLTest, GetStringDirection) { | |
| 168 struct { | |
| 169 const wchar_t* text; | |
| 170 TextDirection direction; | |
| 171 } cases[] = { | |
| 172 // Test pure LTR string. | |
| 173 { L"foobar", LEFT_TO_RIGHT }, | |
| 174 { L".foobar", LEFT_TO_RIGHT }, | |
| 175 { L"foo, bar", LEFT_TO_RIGHT }, | |
| 176 // Test pure LTR with strong directionality characters of type LRE. | |
| 177 { L"\x202a\x202a", LEFT_TO_RIGHT }, | |
| 178 { L".\x202a\x202a", LEFT_TO_RIGHT }, | |
| 179 { L"\x202a, \x202a", LEFT_TO_RIGHT }, | |
| 180 // Test pure LTR with strong directionality characters of type LRO. | |
| 181 { L"\x202d\x202d", LEFT_TO_RIGHT }, | |
| 182 { L".\x202d\x202d", LEFT_TO_RIGHT }, | |
| 183 { L"\x202d, \x202d", LEFT_TO_RIGHT }, | |
| 184 // Test pure LTR with various types of strong directionality characters. | |
| 185 { L"foo \x202a\x202d", LEFT_TO_RIGHT }, | |
| 186 { L".\x202d foo \x202a", LEFT_TO_RIGHT }, | |
| 187 { L"\x202a, \x202d foo", LEFT_TO_RIGHT }, | |
| 188 // Test pure RTL with strong directionality characters of type R. | |
| 189 { L"\x05d0\x05d0", RIGHT_TO_LEFT }, | |
| 190 { L".\x05d0\x05d0", RIGHT_TO_LEFT }, | |
| 191 { L"\x05d0, \x05d0", RIGHT_TO_LEFT }, | |
| 192 // Test pure RTL with strong directionality characters of type RLE. | |
| 193 { L"\x202b\x202b", RIGHT_TO_LEFT }, | |
| 194 { L".\x202b\x202b", RIGHT_TO_LEFT }, | |
| 195 { L"\x202b, \x202b", RIGHT_TO_LEFT }, | |
| 196 // Test pure RTL with strong directionality characters of type RLO. | |
| 197 { L"\x202e\x202e", RIGHT_TO_LEFT }, | |
| 198 { L".\x202e\x202e", RIGHT_TO_LEFT }, | |
| 199 { L"\x202e, \x202e", RIGHT_TO_LEFT }, | |
| 200 // Test pure RTL with strong directionality characters of type AL. | |
| 201 { L"\x0622\x0622", RIGHT_TO_LEFT }, | |
| 202 { L".\x0622\x0622", RIGHT_TO_LEFT }, | |
| 203 { L"\x0622, \x0622", RIGHT_TO_LEFT }, | |
| 204 // Test pure RTL with various types of strong directionality characters. | |
| 205 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT }, | |
| 206 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT }, | |
| 207 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT }, | |
| 208 // Test bidi strings. | |
| 209 { L"foo \x05d0 bar", UNKNOWN_DIRECTION }, | |
| 210 { L"\x202b foo bar", UNKNOWN_DIRECTION }, | |
| 211 { L"!foo \x0622 bar", UNKNOWN_DIRECTION }, | |
| 212 { L"\x202a\x202b", UNKNOWN_DIRECTION }, | |
| 213 { L"\x202e\x202d", UNKNOWN_DIRECTION }, | |
| 214 { L"\x0622\x202a", UNKNOWN_DIRECTION }, | |
| 215 { L"\x202d\x05d0", UNKNOWN_DIRECTION }, | |
| 216 // Test a string without strong directionality characters. | |
| 217 { L",!.{}", LEFT_TO_RIGHT }, | |
| 218 // Test empty string. | |
| 219 { L"", LEFT_TO_RIGHT }, | |
| 220 { | |
| 221 #if defined(WCHAR_T_IS_UTF32) | |
| 222 L" ! \x10910" L"abc 123", | |
| 223 #elif defined(WCHAR_T_IS_UTF16) | |
| 224 L" ! \xd802\xdd10" L"abc 123", | |
| 225 #else | |
| 226 #error wchar_t should be either UTF-16 or UTF-32 | |
| 227 #endif | |
| 228 UNKNOWN_DIRECTION }, | |
| 229 { | |
| 230 #if defined(WCHAR_T_IS_UTF32) | |
| 231 L" ! \x10401" L"abc 123", | |
| 232 #elif defined(WCHAR_T_IS_UTF16) | |
| 233 L" ! \xd801\xdc01" L"abc 123", | |
| 234 #else | |
| 235 #error wchar_t should be either UTF-16 or UTF-32 | |
| 236 #endif | |
| 237 LEFT_TO_RIGHT }, | |
| 238 }; | |
| 239 | |
| 240 for (size_t i = 0; i < arraysize(cases); ++i) | |
| 241 EXPECT_EQ(cases[i].direction, | |
| 242 GetStringDirection(WideToUTF16(cases[i].text))); | |
| 243 } | |
| 244 | |
| 245 TEST_F(RTLTest, WrapPathWithLTRFormatting) { | |
| 246 const wchar_t* cases[] = { | |
| 247 L"/foo/bar/test.jpg", | |
| 248 // Test path start with current directory, such as "./foo". | |
| 249 L"./foo", | |
| 250 // Test path start with parent directory, such as "../foo/bar.jpg". | |
| 251 L"../foo/bar.jpg", | |
| 252 // Test absolute path, such as "//foo/bar.jpg". | |
| 253 L"//foo/bar.jpg", | |
| 254 // Test empty path. | |
| 255 L"" | |
| 256 }; | |
| 257 | |
| 258 for (size_t i = 0; i < arraysize(cases); ++i) { | |
| 259 FilePath path; | |
| 260 path = FilePath(base::SysWideToNativeMB(cases[i])); | |
| 261 std::wstring wrapped_expected = | |
| 262 std::wstring(L"\x202a") + cases[i] + L"\x202c"; | |
| 263 string16 localized_file_path_string; | |
| 264 WrapPathWithLTRFormatting(path, &localized_file_path_string); | |
| 265 | |
| 266 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string); | |
| 267 EXPECT_EQ(wrapped_expected, wrapped_actual); | |
| 268 } | |
| 269 } | |
| 270 | |
| 271 TEST_F(RTLTest, WrapString) { | |
| 272 const wchar_t* cases[] = { | |
| 273 L" . ", | |
| 274 L"abc", | |
| 275 L"a" L"\x5d0\x5d1", | |
| 276 L"a" L"\x5d1" L"b", | |
| 277 L"\x5d0\x5d1\x5d2", | |
| 278 L"\x5d0\x5d1" L"a", | |
| 279 L"\x5d0" L"a" L"\x5d1", | |
| 280 }; | |
| 281 | |
| 282 const bool was_rtl = IsRTL(); | |
| 283 | |
| 284 test::ScopedRestoreICUDefaultLocale restore_locale; | |
| 285 for (size_t i = 0; i < 2; ++i) { | |
| 286 // Toggle the application default text direction (to try each direction). | |
| 287 SetRTL(!IsRTL()); | |
| 288 | |
| 289 string16 empty; | |
| 290 WrapStringWithLTRFormatting(&empty); | |
| 291 EXPECT_TRUE(empty.empty()); | |
| 292 WrapStringWithRTLFormatting(&empty); | |
| 293 EXPECT_TRUE(empty.empty()); | |
| 294 | |
| 295 for (size_t i = 0; i < arraysize(cases); ++i) { | |
| 296 string16 input = WideToUTF16(cases[i]); | |
| 297 string16 ltr_wrap = input; | |
| 298 WrapStringWithLTRFormatting(<r_wrap); | |
| 299 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark); | |
| 300 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input); | |
| 301 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting); | |
| 302 | |
| 303 string16 rtl_wrap = input; | |
| 304 WrapStringWithRTLFormatting(&rtl_wrap); | |
| 305 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark); | |
| 306 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input); | |
| 307 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting); | |
| 308 } | |
| 309 } | |
| 310 | |
| 311 EXPECT_EQ(was_rtl, IsRTL()); | |
| 312 } | |
| 313 | |
| 314 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) { | |
| 315 struct { | |
| 316 const wchar_t* path; | |
| 317 bool wrap_ltr; | |
| 318 bool wrap_rtl; | |
| 319 } cases[] = { | |
| 320 { L"test", false, true }, | |
| 321 { L"test.html", false, true }, | |
| 322 { L"\x05d0\x05d1\x05d2", true, true }, | |
| 323 { L"\x05d0\x05d1\x05d2.txt", true, true }, | |
| 324 { L"\x05d0" L"abc", true, true }, | |
| 325 { L"\x05d0" L"abc.txt", true, true }, | |
| 326 { L"abc\x05d0\x05d1", false, true }, | |
| 327 { L"abc\x05d0\x05d1.jpg", false, true }, | |
| 328 }; | |
| 329 | |
| 330 const bool was_rtl = IsRTL(); | |
| 331 | |
| 332 test::ScopedRestoreICUDefaultLocale restore_locale; | |
| 333 for (size_t i = 0; i < 2; ++i) { | |
| 334 // Toggle the application default text direction (to try each direction). | |
| 335 SetRTL(!IsRTL()); | |
| 336 for (size_t i = 0; i < arraysize(cases); ++i) { | |
| 337 string16 input = WideToUTF16(cases[i].path); | |
| 338 string16 output = GetDisplayStringInLTRDirectionality(input); | |
| 339 // Test the expected wrapping behavior for the current UI directionality. | |
| 340 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr) | |
| 341 EXPECT_NE(output, input); | |
| 342 else | |
| 343 EXPECT_EQ(output, input); | |
| 344 } | |
| 345 } | |
| 346 | |
| 347 EXPECT_EQ(was_rtl, IsRTL()); | |
| 348 } | |
| 349 | |
| 350 TEST_F(RTLTest, GetTextDirection) { | |
| 351 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar")); | |
| 352 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG")); | |
| 353 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he")); | |
| 354 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL")); | |
| 355 // iw is an obsolete code for Hebrew. | |
| 356 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw")); | |
| 357 // Although we're not yet localized to Farsi and Urdu, we | |
| 358 // do have the text layout direction information for them. | |
| 359 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa")); | |
| 360 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur")); | |
| 361 #if 0 | |
| 362 // Enable these when we include the minimal locale data for Azerbaijani | |
| 363 // written in Arabic and Dhivehi. At the moment, our copy of | |
| 364 // ICU data does not have entries for them. | |
| 365 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab")); | |
| 366 // Dhivehi that uses Thaana script. | |
| 367 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv")); | |
| 368 #endif | |
| 369 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en")); | |
| 370 // Chinese in China with '-'. | |
| 371 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN")); | |
| 372 // Filipino : 3-letter code | |
| 373 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil")); | |
| 374 // Russian | |
| 375 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru")); | |
| 376 // Japanese that uses multiple scripts | |
| 377 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja")); | |
| 378 } | |
| 379 | |
| 380 TEST_F(RTLTest, UnadjustStringForLocaleDirection) { | |
| 381 // These test strings are borrowed from WrapPathWithLTRFormatting | |
| 382 const wchar_t* cases[] = { | |
| 383 L"foo bar", | |
| 384 L"foo \x05d0 bar", | |
| 385 L"\x05d0 foo bar", | |
| 386 L"!foo \x05d0 bar", | |
| 387 L",\x05d0 foo bar", | |
| 388 L"\x202a \x05d0 foo bar", | |
| 389 L"\x202d \x05d0 foo bar", | |
| 390 L"\x202b foo \x05d0 bar", | |
| 391 L"\x202e foo \x05d0 bar", | |
| 392 L"\x0622 foo \x05d0 bar", | |
| 393 }; | |
| 394 | |
| 395 const bool was_rtl = IsRTL(); | |
| 396 | |
| 397 test::ScopedRestoreICUDefaultLocale restore_locale; | |
| 398 for (size_t i = 0; i < 2; ++i) { | |
| 399 // Toggle the application default text direction (to try each direction). | |
| 400 SetRTL(!IsRTL()); | |
| 401 | |
| 402 for (size_t i = 0; i < arraysize(cases); ++i) { | |
| 403 string16 test_case = WideToUTF16(cases[i]); | |
| 404 string16 adjusted_string = test_case; | |
| 405 | |
| 406 if (!AdjustStringForLocaleDirection(&adjusted_string)) | |
| 407 continue; | |
| 408 | |
| 409 EXPECT_NE(test_case, adjusted_string); | |
| 410 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string)); | |
| 411 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case | |
| 412 << "] with IsRTL() == " << IsRTL(); | |
| 413 } | |
| 414 } | |
| 415 | |
| 416 EXPECT_EQ(was_rtl, IsRTL()); | |
| 417 } | |
| 418 | |
| 419 } // namespace i18n | |
| 420 } // namespace base | |
| OLD | NEW |