OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2012-2014 International Business Machines |
| 2 # Corporation and others. All Rights Reserved. |
| 3 # |
| 4 # This file should be in UTF-8 with a signature byte sequence ("BOM"). |
| 5 # |
| 6 # collationtest.txt: Collation test data. |
| 7 # |
| 8 # created on: 2012apr13 |
| 9 # created by: Markus W. Scherer |
| 10 |
| 11 # A line with "** test: description" is used for verbose and error output. |
| 12 |
| 13 # A collator can be set with "@ root" or "@ locale language-tag", |
| 14 # for example "@ locale de-u-co-phonebk". |
| 15 # An old-style locale ID can also be used, for example "@ locale de@collation=ph
onebook". |
| 16 |
| 17 # A collator can be built with "@ rules". |
| 18 # An "@ rules" line is followed by one or more lines with the tailoring rules. |
| 19 |
| 20 # A collator can be modified with "% attribute=value". |
| 21 |
| 22 # "* compare" tests the order (= or <) of the following strings. |
| 23 # The relation can be "=" or "<" (the level of the difference is not specified) |
| 24 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference). |
| 25 |
| 26 # Test sections ("* compare") are terminated by |
| 27 # definitions of new collators, changing attributes, or new test sections. |
| 28 |
| 29 ** test: simple CEs & expansions |
| 30 # Many types of mappings are tested elsewhere, including via the UCA conformance
tests. |
| 31 # Here we mostly cover a few unusual mappings. |
| 32 @ rules |
| 33 &\x01 # most control codes are ignorable |
| 34 <<<\u0300 # tertiary CE |
| 35 &9<\x00 # NUL not ignorable |
| 36 &\uA00A\uA00B=\uA002 # two long-primary CEs |
| 37 &\uA00A\uA00B\u00050005=\uA003 # three CEs, require 64 bits |
| 38 |
| 39 * compare |
| 40 = \x01 |
| 41 = \x02 |
| 42 <3 \u0300 |
| 43 <1 9 |
| 44 <1 \x00 |
| 45 = \x01\x00\x02 |
| 46 <1 a |
| 47 <3 a\u0300 |
| 48 <2 a\u0308 |
| 49 = ä |
| 50 <1 b |
| 51 <1 か # Hiragana Ka (U+304B) |
| 52 <2 か\u3099 # plus voiced sound mark |
| 53 = が # Hiragana Ga (U+304C) |
| 54 <1 \uA00A\uA00B |
| 55 = \uA002 |
| 56 <1 \uA00A\uA00B\u00050004 |
| 57 <1 \uA00A\uA00B\u00050005 |
| 58 = \uA003 |
| 59 <1 \uA00A\uA00B\u00050006 |
| 60 |
| 61 ** test: contractions |
| 62 # Create some interesting mappings, and map some normalization-inert characters |
| 63 # (which are not subject to canonical reordering) |
| 64 # to some of the same CEs to check the sequence of CEs. |
| 65 @ rules |
| 66 |
| 67 # Contractions starting with 'a' should not continue with any character < U+0300 |
| 68 # so that we can test a shortcut for that. |
| 69 &a=ⓐ |
| 70 &b<bz=ⓑ |
| 71 &d<dz\u0301=ⓓ # d+z+acute |
| 72 &z |
| 73 <a\u0301=Ⓐ # a+acute sorts after z |
| 74 <a\u0301\u0301=Ⓑ # a+acute+acute |
| 75 <a\u0301\u0301\u0358=Ⓒ # a+acute+acute+dot above right |
| 76 <a\u030a=Ⓓ # a+ring |
| 77 <a\u0323=Ⓔ # a+dot below |
| 78 <a\u0323\u0358=Ⓕ # a+dot below+dot above right |
| 79 <a\u0327\u0323\u030a=Ⓖ # a+cedilla+dot below+ring |
| 80 <a\u0327\u0323bz=Ⓗ # a+cedilla+dot below+b+z |
| 81 |
| 82 &\U0001D158=⁰ # musical notehead black (has a symbol primary) |
| 83 <\U0001D158\U0001D165=¼ # musical quarter note |
| 84 |
| 85 # deliberately missing prefix contractions: |
| 86 # dz |
| 87 # a\u0327 |
| 88 # a\u0327\u0323 |
| 89 # a\u0327\u0323b |
| 90 |
| 91 &\x01 |
| 92 <<<\U0001D165=¹ # musical stem (ccc=216) |
| 93 <<<\U0001D16D=² # musical augmentation dot (ccc=226) |
| 94 <<<\U0001D165\U0001D16D=³ # stem+dot (ccc=216 226) |
| 95 &\u0301=❶ # acute (ccc=230) |
| 96 &\u030a=❷ # ring (ccc=230) |
| 97 &\u0308=❸ # diaeresis (ccc=230) |
| 98 <<\u0308\u0301=❹ # diaeresis+acute (=dialytika tonos) (ccc=230 230) |
| 99 &\u0327=❺ # cedilla (ccc=202) |
| 100 &\u0323=❻ # dot below (ccc=220) |
| 101 &\u0331=❼ # macron below (ccc=220) |
| 102 <<\u0331\u0358=❽ # macron below+dot above right (ccc=220 232) |
| 103 &\u0334=❾ # tilde overlay (ccc=1) |
| 104 &\u0358=❿ # dot above right (ccc=232) |
| 105 |
| 106 &\u0f71=① # tibetan vowel sign aa |
| 107 &\u0f72=② # tibetan vowel sign i |
| 108 # \u0f71\u0f72 # tibetan vowel sign aa + i = ii = U+0F73 |
| 109 &\u0f73=③ # tibetan vowel sign ii (ccc=0 but lccc=129) |
| 110 |
| 111 ** test: simple contractions |
| 112 |
| 113 # Some strings are chosen to cause incremental contiguous contraction matching t
o |
| 114 # go into partial matches for prefixes of contractions |
| 115 # (where the prefixes are deliberately not also contractions). |
| 116 # When there is no complete match, then the matching code must back out of those |
| 117 # so that discontiguous contractions work as specified. |
| 118 |
| 119 * compare |
| 120 # contraction starter with no following text, or mismatch, or blocked |
| 121 <1 a |
| 122 = ⓐ |
| 123 <1 aa |
| 124 = ⓐⓐ |
| 125 <1 ab |
| 126 = ⓐb |
| 127 <1 az |
| 128 = ⓐz |
| 129 |
| 130 * compare |
| 131 <1 a |
| 132 <2 a\u0308\u030a # ring blocked by diaeresis |
| 133 = ⓐ❸❷ |
| 134 <2 a\u0327 |
| 135 = ⓐ❺ |
| 136 |
| 137 * compare |
| 138 <2 \u0308 |
| 139 = ❸ |
| 140 <2 \u0308\u030a\u0301 # acute blocked by ring |
| 141 = ❸❷❶ |
| 142 |
| 143 * compare |
| 144 <1 \U0001D158 |
| 145 = ⁰ |
| 146 <1 \U0001D158\U0001D165 |
| 147 = ¼ |
| 148 |
| 149 # no discontiguous contraction because of missing prefix contraction d+z, |
| 150 # and a starter ('z') after the 'd' |
| 151 * compare |
| 152 <1 dz\u0323\u0301 |
| 153 = dz❻❶ |
| 154 |
| 155 # contiguous contractions |
| 156 * compare |
| 157 <1 abz |
| 158 = ⓐⓑ |
| 159 <1 abzz |
| 160 = ⓐⓑz |
| 161 |
| 162 * compare |
| 163 <1 a |
| 164 <1 z |
| 165 <1 a\u0301 |
| 166 = Ⓐ |
| 167 <1 a\u0301\u0301 |
| 168 = Ⓑ |
| 169 <1 a\u0301\u0301\u0358 |
| 170 = Ⓒ |
| 171 <1 a\u030a |
| 172 = Ⓓ |
| 173 <1 a\u0323\u0358 |
| 174 = Ⓕ |
| 175 <1 a\u0327\u0323\u030a # match despite missing prefix |
| 176 = Ⓖ |
| 177 <1 a\u0327\u0323bz |
| 178 = Ⓗ |
| 179 |
| 180 * compare |
| 181 <2 \u0308\u0308\u0301 # acute blocked from first diaeresis, contracts with seco
nd |
| 182 = ❸❹ |
| 183 |
| 184 * compare |
| 185 <1 \U0001D158\U0001D165 |
| 186 = ¼ |
| 187 |
| 188 * compare |
| 189 <3 \U0001D165\U0001D16D |
| 190 = ³ |
| 191 |
| 192 ** test: discontiguous contractions |
| 193 * compare |
| 194 <1 a\u0327\u030a # a+ring skips cedilla |
| 195 = Ⓓ❺ |
| 196 <2 a\u0327\u0327\u030a # a+ring skips 2 cedillas |
| 197 = Ⓓ❺❺ |
| 198 <2 a\u0327\u0327\u0327\u030a # a+ring skips 3 cedillas |
| 199 = Ⓓ❺❺❺ |
| 200 <2 a\u0334\u0327\u0327\u030a # a+ring skips tilde overlay & 2 cedillas |
| 201 = Ⓓ❾❺❺ |
| 202 <1 a\u0327\u0323 # a+dot below skips cedilla |
| 203 = Ⓔ❺ |
| 204 <1 a\u0323\u0301\u0358 # a+dot below+dot ab.r.: 2-char match, then skip
s acute |
| 205 = Ⓕ❶ |
| 206 <2 a\u0334\u0323\u0358 # a+dot below skips tilde overlay |
| 207 = Ⓕ❾ |
| 208 |
| 209 * compare |
| 210 <2 \u0331\u0331\u0358 # macron below+dot ab.r. skips the second macron
below |
| 211 = ❽❼ |
| 212 |
| 213 * compare |
| 214 <1 a\u0327\u0331\u0323\u030a # a+ring skips cedilla, macron below, dot below
(dot blocked by macron) |
| 215 = Ⓓ❺❼❻ |
| 216 <1 a\u0327\u0323\U0001D16D\u030a # a+dot below skips cedilla |
| 217 = Ⓔ❺²❷ |
| 218 <2 a\u0327\u0327\u0323\u030a # a+dot below skips 2 cedillas |
| 219 = Ⓔ❺❺❷ |
| 220 <2 a\u0327\u0323\u0323\u030a # a+dot below skips cedilla |
| 221 = Ⓔ❺❻❷ |
| 222 <2 a\u0334\u0327\u0323\u030a # a+dot below skips tilde overlay & cedilla |
| 223 = Ⓔ❾❺❷ |
| 224 |
| 225 * compare |
| 226 <1 \U0001D158\u0327\U0001D165 # quarter note skips cedilla |
| 227 = ¼❺ |
| 228 <1 a\U0001D165\u0323 # a+dot below skips stem |
| 229 = Ⓔ¹ |
| 230 |
| 231 # partial contiguous match, backs up, matches discontiguous contraction |
| 232 <1 a\u0327\u0323b |
| 233 = Ⓔ❺b |
| 234 <1 a\u0327\u0323ba |
| 235 = Ⓔ❺bⓐ |
| 236 |
| 237 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc com
bining marks |
| 238 * compare |
| 239 <1 a\u0327\u0301\u0301\u0358 |
| 240 = Ⓒ❺ |
| 241 |
| 242 # FCD but not NFD |
| 243 * compare |
| 244 <1 a\u0f73\u0301 # a+acute skips tibetan ii |
| 245 = Ⓐ③ |
| 246 |
| 247 # FCD but the 0f71 inside the 0f73 must be skipped |
| 248 # to match the discontiguous contraction of the first 0f71 with the trailing 0f7
2 inside the 0f73 |
| 249 * compare |
| 250 <1 \u0f71\u0f73 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72 |
| 251 = ③① |
| 252 |
| 253 ** test: discontiguous contractions with nested contractions |
| 254 * compare |
| 255 <1 a\u0323\u0308\u0301\u0358 |
| 256 = Ⓕ❹ |
| 257 <2 a\u0323\u0308\u0301\u0308\u0301\u0358 |
| 258 = Ⓕ❹❹ |
| 259 |
| 260 ** test: discontiguous contractions with interleaved contractions |
| 261 * compare |
| 262 # a+ring & cedilla & macron below+dot above right |
| 263 <1 a\u0327\u0331\u030a\u0358 |
| 264 = Ⓓ❺❽ |
| 265 |
| 266 # a+ring & 1x..3x macron below+dot above right |
| 267 <2 a\u0331\u030a\u0358 |
| 268 = Ⓓ❽ |
| 269 <2 a\u0331\u0331\u030a\u0358\u0358 |
| 270 = Ⓓ❽❽ |
| 271 # also skips acute |
| 272 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358 |
| 273 = Ⓓ❽❽❽❶ |
| 274 |
| 275 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute |
| 276 <1 a\U0001D165\u0323\U0001D16Ddz\u0301 |
| 277 = Ⓔ³ⓓ |
| 278 |
| 279 ** test: some simple string comparisons |
| 280 @ root |
| 281 * compare |
| 282 # first string compares against "" |
| 283 = \u0000 |
| 284 < a |
| 285 <1 b |
| 286 <3 B |
| 287 = \u0000B\u0000 |
| 288 |
| 289 ** test: compare with strength=primary |
| 290 % strength=primary |
| 291 * compare |
| 292 <1 a |
| 293 <1 b |
| 294 = B |
| 295 |
| 296 ** test: compare with strength=secondary |
| 297 % strength=secondary |
| 298 * compare |
| 299 <1 a |
| 300 <1 b |
| 301 = B |
| 302 |
| 303 ** test: compare with strength=tertiary |
| 304 % strength=tertiary |
| 305 * compare |
| 306 <1 a |
| 307 <1 b |
| 308 <3 B |
| 309 |
| 310 ** test: compare with strength=quaternary |
| 311 % strength=quaternary |
| 312 * compare |
| 313 <1 a |
| 314 <1 b |
| 315 <3 B |
| 316 |
| 317 ** test: compare with strength=identical |
| 318 % strength=identical |
| 319 * compare |
| 320 <1 a |
| 321 <1 b |
| 322 <3 B |
| 323 |
| 324 ** test: côté with forwards secondary |
| 325 @ root |
| 326 * compare |
| 327 <1 cote |
| 328 <2 coté |
| 329 <2 côte |
| 330 <2 côté |
| 331 |
| 332 ** test: côté with forwards secondary vs. U+FFFE merge separator |
| 333 # Merged sort keys: On each level, any difference in the first segment |
| 334 # must trump any further difference. |
| 335 * compare |
| 336 <1 cote\uFFFEcôté |
| 337 <2 coté\uFFFEcôte |
| 338 <2 côte\uFFFEcoté |
| 339 <2 côté\uFFFEcote |
| 340 |
| 341 ** test: côté with backwards secondary |
| 342 % backwards=on |
| 343 * compare |
| 344 <1 cote |
| 345 <2 côte |
| 346 <2 coté |
| 347 <2 côté |
| 348 |
| 349 ** test: côté with backwards secondary vs. U+FFFE merge separator |
| 350 # Merged sort keys: On each level, any difference in the first segment |
| 351 # must trump any further difference. |
| 352 * compare |
| 353 <1 cote\uFFFEcôté |
| 354 <2 côte\uFFFEcoté |
| 355 <2 coté\uFFFEcôte |
| 356 <2 côté\uFFFEcote |
| 357 |
| 358 ** test: U+FFFE on identical level |
| 359 @ root |
| 360 % strength=identical |
| 361 * compare |
| 362 # All of these control codes are completely-ignorable, so that |
| 363 # their low code points are compared with the merge separator. |
| 364 # The merge separator must compare less than any other character. |
| 365 <1 \uFFFE\u0001\u0002\u0003 |
| 366 <i \u0001\uFFFE\u0002\u0003 |
| 367 <i \u0001\u0002\uFFFE\u0003 |
| 368 <i \u0001\u0002\u0003\uFFFE |
| 369 |
| 370 * compare |
| 371 # The merge separator must even compare less than U+0000. |
| 372 <1 \uFFFE\u0000\u0000 |
| 373 <i \u0000\uFFFE\u0000 |
| 374 <i \u0000\u0000\uFFFE |
| 375 |
| 376 ** test: Hani < surrogates < U+FFFD |
| 377 # Note: compareUTF8() treats unpaired surrogates like U+FFFD, |
| 378 # so with that the strings with surrogates will compare equal to each other |
| 379 # and equal to the string with U+FFFD. |
| 380 @ root |
| 381 % strength=identical |
| 382 * compare |
| 383 <1 abz |
| 384 <1 a\u4e00z |
| 385 <1 a\U00020000z |
| 386 <1 a\ud800z |
| 387 <1 a\udbffz |
| 388 <1 a\udc00z |
| 389 <1 a\udfffz |
| 390 <1 a\ufffdz |
| 391 |
| 392 ** test: script reordering |
| 393 @ root |
| 394 % reorder Hani Zzzz digit |
| 395 * compare |
| 396 <1 ? |
| 397 <1 + |
| 398 <1 丂 |
| 399 <1 a |
| 400 <1 α |
| 401 <1 5 |
| 402 |
| 403 % reorder default |
| 404 * compare |
| 405 <1 ? |
| 406 <1 + |
| 407 <1 5 |
| 408 <1 a |
| 409 <1 α |
| 410 <1 丂 |
| 411 |
| 412 ** test: empty rules |
| 413 @ rules |
| 414 * compare |
| 415 <1 a |
| 416 <2 ä |
| 417 <3 Ä |
| 418 <1 b |
| 419 |
| 420 ** test: very simple rules |
| 421 @ rules |
| 422 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z |
| 423 % strength=quaternary |
| 424 * compare |
| 425 <1 a |
| 426 = e |
| 427 <4 q |
| 428 <4 r |
| 429 <1 x |
| 430 <3 X |
| 431 <2 y |
| 432 <3 Y |
| 433 <2 z |
| 434 <3 Z |
| 435 |
| 436 ** test: tailoring twice before a root position: primary |
| 437 @ rules |
| 438 &[before 1]b<p |
| 439 &[before 1]b<q |
| 440 * compare |
| 441 <1 a |
| 442 <1 p |
| 443 <1 q |
| 444 <1 b |
| 445 |
| 446 ** test: tailoring twice before a root position: secondary |
| 447 @ rules |
| 448 &[before 2]ſ<<p |
| 449 &[before 2]ſ<<q |
| 450 * compare |
| 451 <1 s |
| 452 <2 p |
| 453 <2 q |
| 454 <2 ſ |
| 455 |
| 456 # secondary-before common weight |
| 457 @ rules |
| 458 &[before 2]b<<p |
| 459 &[before 2]b<<q |
| 460 * compare |
| 461 <1 a |
| 462 <1 p |
| 463 <2 q |
| 464 <2 b |
| 465 |
| 466 ** test: tailoring twice before a root position: tertiary |
| 467 @ rules |
| 468 &[before 3]B<<<p |
| 469 &[before 3]B<<<q |
| 470 * compare |
| 471 <1 b |
| 472 <3 p |
| 473 <3 q |
| 474 <3 B |
| 475 |
| 476 # tertiary-before common weight |
| 477 @ rules |
| 478 &[before 3]b<<<p |
| 479 &[before 3]b<<<q |
| 480 * compare |
| 481 <1 a |
| 482 <1 p |
| 483 <3 q |
| 484 <3 b |
| 485 |
| 486 @ rules |
| 487 &[before 2]b<<s |
| 488 &[before 3]s<<<p |
| 489 &[before 3]s<<<q |
| 490 * compare |
| 491 <1 a |
| 492 <1 p |
| 493 <3 q |
| 494 <3 s |
| 495 <2 b |
| 496 |
| 497 ** test: tailor after completely ignorable |
| 498 @ rules |
| 499 &\x00<<<x<<y |
| 500 * compare |
| 501 = \x00 |
| 502 = \x1F |
| 503 <3 x |
| 504 <2 y |
| 505 |
| 506 ** test: secondary tailoring gaps, ICU ticket 9362 |
| 507 @ rules |
| 508 &[before 2]s<<'_' |
| 509 &s<<r # secondary between s and ſ (long s) |
| 510 &ſ<<*a-q # more than 15 between ſ and secondary CE boundary |
| 511 &[before 2][first primary ignorable]<<u<<v # between secondary CE boundary & lo
west secondary CE |
| 512 &[last primary ignorable]<<y<<z |
| 513 |
| 514 * compare |
| 515 <2 u |
| 516 <2 v |
| 517 <2 \u0332 # lowest secondary CE |
| 518 <2 \u0308 |
| 519 <2 y |
| 520 <2 z |
| 521 <1 s_ |
| 522 <2 ss |
| 523 <2 sr |
| 524 <2 sſ |
| 525 <2 sa |
| 526 <2 sb |
| 527 <2 sp |
| 528 <2 sq |
| 529 <2 sus |
| 530 <2 svs |
| 531 <2 rs |
| 532 |
| 533 ** test: tertiary tailoring gaps, ICU ticket 9362 |
| 534 @ rules |
| 535 &[before 3]t<<<'_' |
| 536 &t<<<r # tertiary between t and fullwidth t |
| 537 &ᵀ<<<*a-q # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary |
| 538 &[before 3][first secondary ignorable]<<<u<<<v # between tertiary CE boundary &
lowest tertiary CE |
| 539 &[last secondary ignorable]<<<y<<<z |
| 540 |
| 541 * compare |
| 542 <3 u |
| 543 <3 v |
| 544 # Note: The root collator currently does not map any characters to tertiary CEs. |
| 545 <3 y |
| 546 <3 z |
| 547 <1 t_ |
| 548 <3 tt |
| 549 <3 tr |
| 550 <3 tt |
| 551 <3 tᵀ |
| 552 <3 ta |
| 553 <3 tb |
| 554 <3 tp |
| 555 <3 tq |
| 556 <3 tut |
| 557 <3 tvt |
| 558 <3 rt |
| 559 |
| 560 ** test: secondary & tertiary around root character |
| 561 @ rules |
| 562 &[before 2]m<<r |
| 563 &m<<s |
| 564 &[before 3]m<<<u |
| 565 &m<<<v |
| 566 * compare |
| 567 <1 l |
| 568 <1 r |
| 569 <2 u |
| 570 <3 m |
| 571 <3 v |
| 572 <2 s |
| 573 <1 n |
| 574 |
| 575 ** test: secondary & tertiary around tailored item |
| 576 @ rules |
| 577 &m<x |
| 578 &[before 2]x<<r |
| 579 &x<<s |
| 580 &[before 3]x<<<u |
| 581 &x<<<v |
| 582 * compare |
| 583 <1 m |
| 584 <1 r |
| 585 <2 u |
| 586 <3 x |
| 587 <3 v |
| 588 <2 s |
| 589 <1 n |
| 590 |
| 591 ** test: more nesting of secondary & tertiary before |
| 592 @ rules |
| 593 &[before 3]m<<<u |
| 594 &[before 2]m<<r |
| 595 &[before 3]r<<<q |
| 596 &m<<<w |
| 597 &m<<t |
| 598 &[before 3]w<<<v |
| 599 &w<<<x |
| 600 &w<<s |
| 601 * compare |
| 602 <1 l |
| 603 <1 q |
| 604 <3 r |
| 605 <2 u |
| 606 <3 m |
| 607 <3 v |
| 608 <3 w |
| 609 <3 x |
| 610 <2 s |
| 611 <2 t |
| 612 <1 n |
| 613 |
| 614 ** test: case bits |
| 615 @ rules |
| 616 &w<x # tailored CE getting case bits |
| 617 =uv=uV=Uv=UV # 2 chars -> 1 CE |
| 618 &ae=ch=cH=Ch=CH # 2 chars -> 2 CEs |
| 619 &rst=yz=yZ=Yz=YZ # 2 chars -> 3 CEs |
| 620 % caseFirst=lower |
| 621 * compare |
| 622 <1 ae |
| 623 = ch |
| 624 <3 cH |
| 625 <3 Ch |
| 626 <3 CH |
| 627 <1 rst |
| 628 = yz |
| 629 <3 yZ |
| 630 <3 Yz |
| 631 <3 YZ |
| 632 <1 w |
| 633 <1 x |
| 634 = uv |
| 635 <3 uV |
| 636 = Uv # mixed case on single CE cannot distinguish variations |
| 637 <3 UV |
| 638 |
| 639 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower |
| 640 @ rules |
| 641 &\u0001<<<t<<<T # tertiary CEs |
| 642 % caseFirst=lower |
| 643 * compare |
| 644 <1 aa |
| 645 <3 aat |
| 646 <3 aaT |
| 647 <3 aA |
| 648 <3 aAt |
| 649 <3 ata |
| 650 <3 aTa |
| 651 |
| 652 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper |
| 653 % caseFirst=upper |
| 654 * compare |
| 655 <1 aA |
| 656 <3 aAt |
| 657 <3 aa |
| 658 <3 aat |
| 659 <3 aaT |
| 660 <3 ata |
| 661 <3 aTa |
| 662 |
| 663 ** test: reset on expansion, ICU tickets 9415 & 9593 |
| 664 @ rules |
| 665 &æ<x # tailor the last primary CE so that x sorts between ae and af |
| 666 &æb=bæ # copy all reset CEs to make bæ sort the same |
| 667 &각<h # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂 |
| 668 &⒀<<y # copy/tailor 4 CEs to make y sort with only a secondary difference |
| 669 &l·=z # handle the pre-context for · when fetching reset CEs |
| 670 <<u # copy/tailor 2 CEs |
| 671 |
| 672 * compare |
| 673 <1 ae |
| 674 <2 æ |
| 675 <1 x |
| 676 <1 af |
| 677 |
| 678 * compare |
| 679 <1 aeb |
| 680 <2 æb |
| 681 = bæ |
| 682 |
| 683 * compare |
| 684 <1 각 |
| 685 <1 h |
| 686 <1 갂 |
| 687 <1 갃 |
| 688 |
| 689 * compare |
| 690 <1 · # by itself: primary CE |
| 691 <1 l |
| 692 <2 l· # l+middle dot has only a secondary difference from l |
| 693 = z |
| 694 <2 u |
| 695 |
| 696 * compare |
| 697 <1 (13) |
| 698 <3 ⒀ # DUCET sets special tertiary weights in all CEs |
| 699 <2 y |
| 700 <1 (13[ |
| 701 |
| 702 % alternate=shifted |
| 703 * compare |
| 704 <1 (13) |
| 705 = 13 |
| 706 <3 ⒀ |
| 707 = y # alternate=shifted removes the tailoring difference on the last CE |
| 708 <1 14 |
| 709 |
| 710 ** test: contraction inside extension, ICU ticket 9378 |
| 711 @ rules |
| 712 &а<<х/й # all letters are Cyrillic |
| 713 * compare |
| 714 <1 ай |
| 715 <2 х |
| 716 |
| 717 ** test: no duplicate tailored CEs for different reset positions with same CEs,
ICU ticket 10104 |
| 718 @ rules |
| 719 &t<x &ᵀ<y # same primary weights |
| 720 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent |
| 721 * compare |
| 722 <1 q |
| 723 <1 u |
| 724 <1 v |
| 725 <1 ꝗ |
| 726 <1 t |
| 727 <3 ᵀ |
| 728 <1 y |
| 729 <1 x |
| 730 |
| 731 # Principle: Each rule builds on the state of preceding rules and ignores follow
ing rules. |
| 732 |
| 733 ** test: later rule does not affect earlier reset position, ICU ticket 10105 |
| 734 @ rules |
| 735 &a < u < v < w &ov < x &b < v |
| 736 * compare |
| 737 <1 oa |
| 738 <1 ou |
| 739 <1 x # CE(o) followed by CE between u and w |
| 740 <1 ow |
| 741 <1 ob |
| 742 <1 ov |
| 743 |
| 744 ** test: later rule does not affect earlier extension (1), ICU ticket 10105 |
| 745 @ rules |
| 746 &a=x/b &v=b |
| 747 % strength=secondary |
| 748 * compare |
| 749 <1 B |
| 750 <1 c |
| 751 <1 v |
| 752 = b |
| 753 * compare |
| 754 <1 AB |
| 755 = x |
| 756 <1 ac |
| 757 <1 av |
| 758 = ab |
| 759 |
| 760 ** test: later rule does not affect earlier extension (2), ICU ticket 10105 |
| 761 @ rules |
| 762 &a <<< c / e &g <<< e / l |
| 763 % strength=secondary |
| 764 * compare |
| 765 <1 AE |
| 766 = c |
| 767 <2 æ |
| 768 <1 agl |
| 769 = ae |
| 770 |
| 771 ** test: later rule does not affect earlier extension (3), ICU ticket 10105 |
| 772 @ rules |
| 773 &a = b / c &d = c / e |
| 774 % strength=secondary |
| 775 * compare |
| 776 <1 AC # C is still only tertiary different from the original c |
| 777 = b |
| 778 <1 ade |
| 779 = ac |
| 780 |
| 781 ** test: extension contains tailored character, ICU ticket 10105 |
| 782 @ rules |
| 783 &a=e &b=u/e |
| 784 * compare |
| 785 <1 a |
| 786 = e |
| 787 <1 ba |
| 788 = be |
| 789 = u |
| 790 |
| 791 ** test: add simple mappings for characters with root context |
| 792 @ rules |
| 793 &z=· # middle dot has a prefix mapping in the CLDR root |
| 794 &n=и # и (U+0438) has contractions in the root |
| 795 * compare |
| 796 <1 l |
| 797 <2 l· # root mapping for l|· still works |
| 798 <1 z |
| 799 = · |
| 800 * compare |
| 801 <1 n |
| 802 = и |
| 803 <1 И |
| 804 <1 и\u0306 # root mapping for й=и\u0306 still works |
| 805 = й |
| 806 <3 Й |
| 807 |
| 808 ** test: add context mappings around characters with root context |
| 809 @ rules |
| 810 &z=·h # middle dot has a prefix mapping in the CLDR root |
| 811 &n=ә|и # и (U+0438) has contractions in the root |
| 812 * compare |
| 813 <1 l |
| 814 <2 l· # root mapping for l|· still works |
| 815 <1 z |
| 816 = ·h |
| 817 * compare |
| 818 <1 и |
| 819 <3 И |
| 820 <1 и\u0306 # root mapping for й=и\u0306 still works |
| 821 = й |
| 822 * compare |
| 823 <1 әn |
| 824 = әи |
| 825 <1 әo |
| 826 |
| 827 ** test: many secondary CEs at the top of their range |
| 828 @ rules |
| 829 &[last primary ignorable]<<*\u2801-\u28ff |
| 830 * compare |
| 831 <2 \u0308 |
| 832 <2 \u2801 |
| 833 <2 \u2802 |
| 834 <2 \u2803 |
| 835 <2 \u2804 |
| 836 <2 \u28fd |
| 837 <2 \u28fe |
| 838 <2 \u28ff |
| 839 <1 \x20 |
| 840 |
| 841 ** test: many tertiary CEs at the top of their range |
| 842 @ rules |
| 843 &[last secondary ignorable]<<<*a-z |
| 844 * compare |
| 845 <3 a |
| 846 <3 b |
| 847 <3 c |
| 848 <3 d |
| 849 # e..w |
| 850 <3 x |
| 851 <3 y |
| 852 <3 z |
| 853 <2 \u0308 |
| 854 |
| 855 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 1
0101 |
| 856 @ rules |
| 857 &a=p|x &b=px &c=op |
| 858 * compare |
| 859 <1 b |
| 860 = px |
| 861 <3 B |
| 862 <1 c |
| 863 = op |
| 864 <3 C |
| 865 * compare |
| 866 <1 ca |
| 867 = opx # first contraction op, then prefix p|x |
| 868 <3 cA |
| 869 <3 Ca |
| 870 |
| 871 ** test: reset position with prefix (pre-context), ICU ticket 10102 |
| 872 @ rules |
| 873 &a=p|x &px=y |
| 874 * compare |
| 875 <1 pa |
| 876 = px |
| 877 = y |
| 878 <3 pA |
| 879 <1 q |
| 880 <1 x |
| 881 |
| 882 ** test: prefix+contraction together (1), ICU ticket 10071 |
| 883 @ rules |
| 884 &x=a|bc |
| 885 * compare |
| 886 <1 ab |
| 887 <1 Abc |
| 888 <1 abd |
| 889 <1 ac |
| 890 <1 aw |
| 891 <1 ax |
| 892 = abc |
| 893 <3 aX |
| 894 <3 Ax |
| 895 <1 b |
| 896 <1 bb |
| 897 <1 bc |
| 898 <3 bC |
| 899 <3 Bc |
| 900 <1 bd |
| 901 |
| 902 ** test: prefix+contraction together (2), ICU ticket 10071 |
| 903 @ rules |
| 904 &w=bc &x=a|b |
| 905 * compare |
| 906 <1 w |
| 907 = bc |
| 908 <3 W |
| 909 * compare |
| 910 <1 aw |
| 911 <1 ax |
| 912 = ab |
| 913 <3 aX |
| 914 <1 axb |
| 915 <1 axc |
| 916 = abc # prefix match a|b takes precedence over contraction match bc |
| 917 <3 abC |
| 918 <1 abd |
| 919 <1 ay |
| 920 |
| 921 ** test: prefix+contraction together (3), ICU ticket 10071 |
| 922 @ rules |
| 923 &x=a|b &w=bc # reverse order of rules as previous test, order should not matt
er here |
| 924 * compare # same "compare" sequences as previous test |
| 925 <1 w |
| 926 = bc |
| 927 <3 W |
| 928 * compare |
| 929 <1 aw |
| 930 <1 ax |
| 931 = ab |
| 932 <3 aX |
| 933 <1 axb |
| 934 <1 axc |
| 935 = abc # prefix match a|b takes precedence over contraction match bc |
| 936 <3 abC |
| 937 <1 abd |
| 938 <1 ay |
| 939 |
| 940 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962 |
| 941 @ rules |
| 942 &d=ch &v=p|ci |
| 943 * compare |
| 944 <1 pc |
| 945 <3 pC |
| 946 <1 pcH |
| 947 <1 pcI |
| 948 <1 pd |
| 949 = pch # no-prefix contraction ch matches |
| 950 <3 pD |
| 951 <1 pv |
| 952 = pci # prefix+contraction p|ci matches |
| 953 <3 pV |
| 954 |
| 955 ** test: tailor in & around compact ranges of root primaries |
| 956 # The Ogham characters U+1681..U+169A are in simple ascending order of primary C
Es |
| 957 # which should be reliably encoded as one range in the root elements data. |
| 958 @ rules |
| 959 &[before 1]ᚁ<a |
| 960 &ᚁ<b |
| 961 &[before 1]ᚂ<c |
| 962 &ᚂ<d |
| 963 &[before 1]ᚚ<y |
| 964 &ᚚ<z |
| 965 &[before 2]ᚁ<<r |
| 966 &ᚁ<<s |
| 967 &[before 3]ᚚ<<<t |
| 968 &ᚚ<<<u |
| 969 * compare |
| 970 <1 ᣵ # U+18F5 last Canadian Aboriginal |
| 971 <1 a |
| 972 <1 r |
| 973 <2 ᚁ |
| 974 <2 s |
| 975 <1 b |
| 976 <1 c |
| 977 <1 ᚂ |
| 978 <1 d |
| 979 <1 ᚃ |
| 980 <1 ᚙ |
| 981 <1 y |
| 982 <1 t |
| 983 <3 ᚚ |
| 984 <3 u |
| 985 <1 z |
| 986 <1 ᚠ # U+16A0 first Runic |
| 987 |
| 988 ** test: suppressContractions |
| 989 @ rules |
| 990 &z<ch<әж [suppressContractions [·cә]] |
| 991 * compare |
| 992 <1 ch |
| 993 <3 cH # ch was suppressed |
| 994 <1 l |
| 995 <1 l· # primary difference, not secondary, because l|· was suppressed |
| 996 <1 ә |
| 997 <2 ә\u0308 # secondary difference, not primary, because contractions for ә were
suppressed |
| 998 <1 әж |
| 999 <3 әЖ |
| 1000 |
| 1001 ** test: Hangul & Jamo |
| 1002 @ rules |
| 1003 &L=\u1100 # first Jamo L |
| 1004 &V=\u1161 # first Jamo V |
| 1005 &T=\u11A8 # first Jamo T |
| 1006 &\uAC01<<*\u4E00-\u4EFF # first Hangul LVT syllable & lots of secondary diffs |
| 1007 * compare |
| 1008 <1 Lv |
| 1009 <3 LV |
| 1010 = \u1100\u1161 |
| 1011 = \uAC00 |
| 1012 <1 LVt |
| 1013 <3 LVT |
| 1014 = \u1100\u1161\u11A8 |
| 1015 = \uAC00\u11A8 |
| 1016 = \uAC01 |
| 1017 <2 LVT\u0308 |
| 1018 <2 \u4E00 |
| 1019 <2 \u4E01 |
| 1020 <2 \u4E80 |
| 1021 <2 \u4EFF |
| 1022 <2 LV\u0308T |
| 1023 <1 \uAC02 |
| 1024 |
| 1025 ** test: adjust special reset positions according to previous rules, CLDR ticket
6070 |
| 1026 @ rules |
| 1027 &[last variable]<x |
| 1028 [maxVariable space] # has effect only after building, no effect on following ru
les |
| 1029 &[last variable]<y |
| 1030 &[before 1][first regular]<z |
| 1031 * compare |
| 1032 <1 ? # some punctuation |
| 1033 <1 x |
| 1034 <1 y |
| 1035 <1 z |
| 1036 <1 $ # some symbol |
| 1037 |
| 1038 @ rules |
| 1039 &[last primary ignorable]<<x<<<y |
| 1040 &[last primary ignorable]<<z |
| 1041 * compare |
| 1042 <2 \u0358 |
| 1043 <2 x |
| 1044 <3 y |
| 1045 <2 z |
| 1046 <1 \x20 |
| 1047 |
| 1048 @ rules |
| 1049 &[last secondary ignorable]<<<x |
| 1050 &[last secondary ignorable]<<<y |
| 1051 * compare |
| 1052 <3 x |
| 1053 <3 y |
| 1054 <2 \u0358 |
| 1055 |
| 1056 @ rules |
| 1057 &[before 2][first variable]<<z |
| 1058 &[before 2][first variable]<<y |
| 1059 &[before 3][first variable]<<<x |
| 1060 &[before 3][first variable]<<<w |
| 1061 &[before 1][first variable]<v |
| 1062 &[before 2][first variable]<<u |
| 1063 &[before 3][first variable]<<<t |
| 1064 &[before 2]\uFDD1\xA0<<s # FractionalUCA.txt: FDD1 00A0, SPACE first primary |
| 1065 * compare |
| 1066 <2 \u0358 |
| 1067 <1 s |
| 1068 <2 \uFDD1\xA0 |
| 1069 <1 t |
| 1070 <3 u |
| 1071 <2 v |
| 1072 <1 w |
| 1073 <3 x |
| 1074 <3 y |
| 1075 <2 z |
| 1076 <2 \t |
| 1077 |
| 1078 @ rules |
| 1079 &[before 2][first regular]<<z |
| 1080 &[before 3][first regular]<<<y |
| 1081 &[before 1][first regular]<x |
| 1082 &[before 3][first regular]<<<w |
| 1083 &[before 2]\uFDD1\u263A<<v # FractionalUCA.txt: FDD1 263A, SYMBOL first primary |
| 1084 &[before 3][first regular]<<<u |
| 1085 &[before 1][first regular]<p # primary before the boundary: becomes variable |
| 1086 &[before 3][first regular]<<<t # not affected by p |
| 1087 &[last variable]<q # after p! |
| 1088 * compare |
| 1089 <1 ? |
| 1090 <1 p |
| 1091 <1 q |
| 1092 <1 t |
| 1093 <3 u |
| 1094 <3 v |
| 1095 <1 w |
| 1096 <3 x |
| 1097 <1 y |
| 1098 <3 z |
| 1099 <1 $ |
| 1100 |
| 1101 # check that p & q are indeed variable |
| 1102 % alternate=shifted |
| 1103 * compare |
| 1104 = ? |
| 1105 = p |
| 1106 = q |
| 1107 <1 t |
| 1108 <3 u |
| 1109 <3 v |
| 1110 <1 w |
| 1111 <3 x |
| 1112 <1 y |
| 1113 <3 z |
| 1114 <1 $ |
| 1115 |
| 1116 @ rules |
| 1117 &[before 2][first trailing]<<z |
| 1118 &[before 1][first trailing]<y |
| 1119 &[before 3][first trailing]<<<x |
| 1120 * compare |
| 1121 <1 \u4E00 # first Han, first implicit |
| 1122 <1 \uFDD1\uFDD0 # FractionalUCA.txt: unassigned first primary |
| 1123 # Note: The root collator currently does not map any characters to the trailing
first boundary primary. |
| 1124 <1 x |
| 1125 <3 y |
| 1126 <1 z |
| 1127 <2 \uFFFD # The root collator currently maps U+FFFD to the first real trailing
primary. |
| 1128 |
| 1129 @ rules |
| 1130 &[before 2][first primary ignorable]<<z |
| 1131 &[before 2][first primary ignorable]<<y |
| 1132 &[before 3][first primary ignorable]<<<x |
| 1133 &[before 3][first primary ignorable]<<<w |
| 1134 * compare |
| 1135 = \x01 |
| 1136 <2 w |
| 1137 <3 x |
| 1138 <3 y |
| 1139 <2 z |
| 1140 <2 \u0301 |
| 1141 |
| 1142 @ rules |
| 1143 &[before 3][first secondary ignorable]<<<y |
| 1144 &[before 3][first secondary ignorable]<<<x |
| 1145 * compare |
| 1146 = \x01 |
| 1147 <3 x |
| 1148 <3 y |
| 1149 <2 \u0301 |
| 1150 |
| 1151 ** test: canonical closure |
| 1152 @ rules |
| 1153 &X=A &U=Â |
| 1154 * compare |
| 1155 <1 U |
| 1156 = Â |
| 1157 = A\u0302 |
| 1158 <2 Ú # U with acute |
| 1159 = U\u0301 |
| 1160 = Ấ # A with circumflex & acute |
| 1161 = Â\u0301 |
| 1162 = A\u0302\u0301 |
| 1163 <1 X |
| 1164 = A |
| 1165 <2 X\u030A # with ring above |
| 1166 = Å |
| 1167 = A\u030A |
| 1168 = \u212B # Angstrom sign |
| 1169 |
| 1170 @ rules |
| 1171 &x=\u5140\u55C0 |
| 1172 * compare |
| 1173 <1 x |
| 1174 = \u5140\u55C0 |
| 1175 = \u5140\uFA0D |
| 1176 = \uFA0C\u55C0 |
| 1177 = \uFA0C\uFA0D # CJK compatibility characters |
| 1178 <3 X |
| 1179 |
| 1180 # canonical closure on prefix rules, ICU ticket 9444 |
| 1181 @ rules |
| 1182 &x=ä|ŝ |
| 1183 * compare |
| 1184 <1 äs # not tailored |
| 1185 <1 äx |
| 1186 = äŝ |
| 1187 = a\u0308s\u0302 |
| 1188 = a\u0308ŝ |
| 1189 = äs\u0302 |
| 1190 <3 äX |
| 1191 |
| 1192 ** test: conjoining Jamo map to expansions |
| 1193 @ rules |
| 1194 &gg=\u1101 # Jamo Lead consonant GG |
| 1195 &nj=\u11AC # Jamo Trail consonant NJ |
| 1196 * compare |
| 1197 <1 gg\u1161nj |
| 1198 = \u1101\u1161\u11AC |
| 1199 = \uAE4C\u11AC |
| 1200 = \uAE51 |
| 1201 <3 gg\u1161nJ |
| 1202 <1 \u1100\u1100 |
| 1203 |
| 1204 ** test: canonical tail closure, ICU ticket 5913 |
| 1205 @ rules |
| 1206 &a<â |
| 1207 * compare |
| 1208 <1 a |
| 1209 <1 â # tailored |
| 1210 = a\u0302 |
| 1211 <2 a\u0323\u0302 # discontiguous contraction |
| 1212 = ạ\u0302 # equivalent |
| 1213 = ậ # equivalent |
| 1214 <1 b |
| 1215 |
| 1216 @ rules |
| 1217 &a<ạ |
| 1218 * compare |
| 1219 <1 a |
| 1220 <1 ạ # tailored |
| 1221 = a\u0323 |
| 1222 <2 a\u0323\u0302 # contiguous contraction plus extra diacritic |
| 1223 = ạ\u0302 # equivalent |
| 1224 = ậ # equivalent |
| 1225 <1 b |
| 1226 |
| 1227 # Tail closure should work even if there is a prefix and/or contraction. |
| 1228 @ rules |
| 1229 &a<\u5140|câ |
| 1230 # In order to find discontiguous contractions for \u5140|câ |
| 1231 # there must exist a mapping for \u5140|ca, regardless of what it maps to. |
| 1232 # (This follows from the UCA spec.) |
| 1233 &x=\u5140|ca |
| 1234 * compare |
| 1235 <1 \u5140a |
| 1236 = \uFA0Ca |
| 1237 <1 \u5140câ # tailored |
| 1238 = \uFA0Ccâ |
| 1239 = \u5140ca\u0302 |
| 1240 = \uFA0Cca\u0302 |
| 1241 <2 \u5140ca\u0323\u0302 # discontiguous contraction |
| 1242 = \uFA0Cca\u0323\u0302 |
| 1243 = \u5140cạ\u0302 |
| 1244 = \uFA0Ccạ\u0302 |
| 1245 = \u5140cậ |
| 1246 = \uFA0Ccậ |
| 1247 <1 \u5140b |
| 1248 = \uFA0Cb |
| 1249 <1 \u5140x |
| 1250 = \u5140ca |
| 1251 |
| 1252 # Double-check that without the extra mapping there will be no discontiguous mat
ch. |
| 1253 @ rules |
| 1254 &a<\u5140|câ |
| 1255 * compare |
| 1256 <1 \u5140a |
| 1257 = \uFA0Ca |
| 1258 <1 \u5140câ # tailored |
| 1259 = \uFA0Ccâ |
| 1260 = \u5140ca\u0302 |
| 1261 = \uFA0Cca\u0302 |
| 1262 <1 \u5140b |
| 1263 = \uFA0Cb |
| 1264 <1 \u5140ca\u0323\u0302 # no discontiguous contraction |
| 1265 = \uFA0Cca\u0323\u0302 |
| 1266 = \u5140cạ\u0302 |
| 1267 = \uFA0Ccạ\u0302 |
| 1268 = \u5140cậ |
| 1269 = \uFA0Ccậ |
| 1270 |
| 1271 @ rules |
| 1272 &a<cạ |
| 1273 * compare |
| 1274 <1 a |
| 1275 <1 cạ # tailored |
| 1276 = ca\u0323 |
| 1277 <2 ca\u0323\u0302 # contiguous contraction plus extra diacritic |
| 1278 = cạ\u0302 # equivalent |
| 1279 = cậ # equivalent |
| 1280 <1 b |
| 1281 |
| 1282 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI |
| 1283 # = 03C9 0313 0300 0345 |
| 1284 # ccc = 0, 230, 230, 240 |
| 1285 @ rules |
| 1286 &δ=αῳ |
| 1287 # In order to find discontiguous contractions for αῳ |
| 1288 # there must exist a mapping for αω, regardless of what it maps to. |
| 1289 # (This follows from the UCA spec.) |
| 1290 &ε=αω |
| 1291 * compare |
| 1292 <1 δ |
| 1293 = αῳ |
| 1294 = αω\u0345 |
| 1295 <2 αω\u0313\u0300\u0345 # discontiguous contraction |
| 1296 = αὠ\u0300\u0345 |
| 1297 = αὢ\u0345 |
| 1298 = αᾢ |
| 1299 <2 αω\u0300\u0313\u0345 |
| 1300 = αὼ\u0313\u0345 |
| 1301 = αῲ\u0313 # not FCD |
| 1302 <1 ε |
| 1303 = αω |
| 1304 |
| 1305 # Double-check that without the extra mapping there will be no discontiguous mat
ch. |
| 1306 @ rules |
| 1307 &δ=αῳ |
| 1308 * compare |
| 1309 <1 αω\u0313\u0300\u0345 # no discontiguous contraction |
| 1310 = αὠ\u0300\u0345 |
| 1311 = αὢ\u0345 |
| 1312 = αᾢ |
| 1313 <2 αω\u0300\u0313\u0345 |
| 1314 = αὼ\u0313\u0345 |
| 1315 = αῲ\u0313 # not FCD |
| 1316 <1 δ |
| 1317 = αῳ |
| 1318 = αω\u0345 |
| 1319 |
| 1320 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232. |
| 1321 # Tests code paths where the tailored string has a combining mark |
| 1322 # that does not occur in any composite's decomposition. |
| 1323 @ rules |
| 1324 &δ=αὼ\u0315 |
| 1325 * compare |
| 1326 <1 αω\u0313\u0300\u0315 # Not tailored: The grave accent blocks the comma above
. |
| 1327 = αὠ\u0300\u0315 |
| 1328 = αὢ\u0315 |
| 1329 <1 δ |
| 1330 = αὼ\u0315 |
| 1331 = αω\u0300\u0315 |
| 1332 <2 αω\u0300\u0315\u0345 |
| 1333 = αὼ\u0315\u0345 |
| 1334 = αῲ\u0315 # not FCD |
| 1335 |
| 1336 ** test: danish a+a vs. a-umlaut, ICU ticket 9319 |
| 1337 @ rules |
| 1338 &z<aa |
| 1339 * compare |
| 1340 <1 z |
| 1341 <1 aa |
| 1342 <2 aa\u0308 |
| 1343 = aä |
| 1344 |
| 1345 ** test: Jamo L with and in prefix |
| 1346 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs o
f Jamo L). |
| 1347 @ rules |
| 1348 # Jamo Lead consonant G after G or GG |
| 1349 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100 |
| 1350 # Jamo Lead consonant GG sorts like G+G |
| 1351 &\u1100\u1100=\u1101 |
| 1352 # Note: Making G|GG and GG|GG sort the same as G|G+G |
| 1353 # would require the ability to reset on G|G+G, |
| 1354 # or we could make G-after-G equal to some secondary-CE character, |
| 1355 # and reset on a pair of those. |
| 1356 # (It does not matter much if there are at most two G in a row in real text.) |
| 1357 * compare |
| 1358 <1 \u1100 |
| 1359 <2 \u1100\u1100 # only one primary from a sequence of G lead consonants |
| 1360 = \u1101 |
| 1361 <2 \u1100\u1100\u1100 |
| 1362 = \u1101\u1100 |
| 1363 # but not = \u1100\u1101, see above |
| 1364 <1 \u1100\u1161 |
| 1365 = \uAC00 |
| 1366 <2 \u1100\u1100\u1161 |
| 1367 = \u1100\uAC00 # prefix match from the L of the LV syllable |
| 1368 = \u1101\u1161 |
| 1369 = \uAE4C |
| 1370 |
| 1371 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546 |
| 1372 @ rules |
| 1373 # Low secondary CEs for Jamo V & T. |
| 1374 # Note: T should sort before V for proper syllable order. |
| 1375 &\u0332 # COMBINING LOW LINE (first primary ignorable) |
| 1376 <<\u1161<<\u1162 |
| 1377 |
| 1378 # Korean Jamo lead consonant search rules, part 2: |
| 1379 # Make modern compound L jamo primary equivalent to non-compound forms. |
| 1380 |
| 1381 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T. |
| 1382 &\u0313 # COMBINING COMMA ABOVE (second primary ignorable) |
| 1383 =\u1100|\u1100 |
| 1384 =\u1103|\u1103 |
| 1385 =\u1107|\u1107 |
| 1386 =\u1109|\u1109 |
| 1387 =\u110C|\u110C |
| 1388 |
| 1389 # Compound L Jamo map to equivalent expansions of primary+secondary CE. |
| 1390 &\u1100\u0313=\u1101<<<\u3132 # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSAN
GKIYEOK |
| 1391 &\u1103\u0313=\u1104<<<\u3138 # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSAN
GTIKEUT |
| 1392 &\u1107\u0313=\u1108<<<\u3143 # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANG
PIEUP |
| 1393 &\u1109\u0313=\u110A<<<\u3146 # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGS
IOS |
| 1394 &\u110C\u0313=\u110D<<<\u3149 # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANG
CIEUC |
| 1395 |
| 1396 * compare |
| 1397 <1 \u1100\u1161 |
| 1398 = \uAC00 |
| 1399 <2 \u1100\u1162 |
| 1400 = \uAC1C |
| 1401 <2 \u1100\u1100\u1161 |
| 1402 = \u1100\uAC00 |
| 1403 = \u1101\u1161 |
| 1404 = \uAE4C |
| 1405 <3 \u3132\u1161 |
| 1406 |
| 1407 ** test: Hangul syllables in prefix & in the interior of a contraction |
| 1408 @ rules |
| 1409 &x=\u1100\u1161|a\u1102\u1162z |
| 1410 * compare |
| 1411 <1 \u1100\u1161x |
| 1412 = \u1100\u1161a\u1102\u1162z |
| 1413 = \u1100\u1161a\uB0B4z |
| 1414 = \uAC00a\u1102\u1162z |
| 1415 = \uAC00a\uB0B4z |
| 1416 |
| 1417 ** test: digits are unsafe-backwards when numeric=on |
| 1418 @ root |
| 1419 % numeric=on |
| 1420 * compare |
| 1421 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a". |
| 1422 # We need to back up before the identical prefix "1" and compare the full number
s. |
| 1423 <1 11b |
| 1424 <1 101a |
| 1425 |
| 1426 ** test: simple locale data test |
| 1427 @ locale de |
| 1428 * compare |
| 1429 <1 a |
| 1430 <2 ä |
| 1431 <1 ae |
| 1432 <2 æ |
| 1433 |
| 1434 @ locale de-u-co-phonebk |
| 1435 * compare |
| 1436 <1 a |
| 1437 <1 ae |
| 1438 <2 ä |
| 1439 <2 æ |
| 1440 |
| 1441 # The following test cases were moved here from ICU 52's DataDrivenCollationTest
.txt. |
| 1442 |
| 1443 ** test: DataDrivenCollationTest/TestMorePinyin |
| 1444 # Testing the primary strength. |
| 1445 @ locale zh |
| 1446 % strength=primary |
| 1447 * compare |
| 1448 < lā |
| 1449 = lĀ |
| 1450 = Lā |
| 1451 = LĀ |
| 1452 < lān |
| 1453 = lĀn |
| 1454 < lē |
| 1455 = lĒ |
| 1456 = Lē |
| 1457 = LĒ |
| 1458 < lēn |
| 1459 = lĒn |
| 1460 |
| 1461 ** test: DataDrivenCollationTest/TestLithuanian |
| 1462 # Lithuanian sort order. |
| 1463 @ locale lt |
| 1464 * compare |
| 1465 < cz |
| 1466 < č |
| 1467 < d |
| 1468 < iz |
| 1469 < j |
| 1470 < sz |
| 1471 < š |
| 1472 < t |
| 1473 < zz |
| 1474 < ž |
| 1475 |
| 1476 ** test: DataDrivenCollationTest/TestLatvian |
| 1477 # Latvian sort order. |
| 1478 @ locale lv |
| 1479 * compare |
| 1480 < cz |
| 1481 < č |
| 1482 < d |
| 1483 < gz |
| 1484 < ģ |
| 1485 < h |
| 1486 < iz |
| 1487 < j |
| 1488 < kz |
| 1489 < ķ |
| 1490 < l |
| 1491 < lz |
| 1492 < ļ |
| 1493 < m |
| 1494 < nz |
| 1495 < ņ |
| 1496 < o |
| 1497 < rz |
| 1498 < ŗ |
| 1499 < s |
| 1500 < sz |
| 1501 < š |
| 1502 < t |
| 1503 < zz |
| 1504 < ž |
| 1505 |
| 1506 ** test: DataDrivenCollationTest/TestEstonian |
| 1507 # Estonian sort order. |
| 1508 @ locale et |
| 1509 * compare |
| 1510 < sy |
| 1511 < š |
| 1512 < šy |
| 1513 < z |
| 1514 < zy |
| 1515 < ž |
| 1516 < v |
| 1517 < va |
| 1518 < w |
| 1519 < õ |
| 1520 < õy |
| 1521 < ä |
| 1522 < äy |
| 1523 < ö |
| 1524 < öy |
| 1525 < ü |
| 1526 < üy |
| 1527 < x |
| 1528 |
| 1529 ** test: DataDrivenCollationTest/TestAlbanian |
| 1530 # Albanian sort order. |
| 1531 @ locale sq |
| 1532 * compare |
| 1533 < cz |
| 1534 < ç |
| 1535 < d |
| 1536 < dz |
| 1537 < dh |
| 1538 < e |
| 1539 < ez |
| 1540 < ë |
| 1541 < f |
| 1542 < gz |
| 1543 < gj |
| 1544 < h |
| 1545 < lz |
| 1546 < ll |
| 1547 < m |
| 1548 < nz |
| 1549 < nj |
| 1550 < o |
| 1551 < rz |
| 1552 < rr |
| 1553 < s |
| 1554 < sz |
| 1555 < sh |
| 1556 < t |
| 1557 < tz |
| 1558 < th |
| 1559 < u |
| 1560 < xz |
| 1561 < xh |
| 1562 < y |
| 1563 < zz |
| 1564 < zh |
| 1565 |
| 1566 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder |
| 1567 # Sorted file has different order. |
| 1568 @ root |
| 1569 # normalization=on turned on & off automatically. |
| 1570 * compare |
| 1571 < \u5F20 |
| 1572 < \u5F20\u4E00\u8E3F |
| 1573 |
| 1574 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash |
| 1575 # This pretty much crashes. |
| 1576 @ root |
| 1577 * compare |
| 1578 < \u0f71\u0f72\u0f80\u0f71\u0f72 |
| 1579 < \u0f80 |
| 1580 |
| 1581 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems |
| 1582 # These are examples of strings that caused trouble in partial sort key testing. |
| 1583 @ locale th-TH |
| 1584 * compare |
| 1585 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C |
| 1586 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18 |
| 1587 * compare |
| 1588 < \u0E01\u0E07\u0E01\u0E32\u0E23 |
| 1589 < \u0E01\u0E07\u0E42\u0E01\u0E49 |
| 1590 * compare |
| 1591 < \u0E01\u0E23\u0E19\u0E17\u0E32 |
| 1592 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32 |
| 1593 * compare |
| 1594 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27 |
| 1595 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27 |
| 1596 * compare |
| 1597 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D |
| 1598 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32 |
| 1599 |
| 1600 ** test: DataDrivenCollationTest/TestJavaStyleRule |
| 1601 # java.text allows rules to start as '<<<x<<<y...' |
| 1602 # we emulate this by assuming a &[first tertiary ignorable] in this case. |
| 1603 @ rules |
| 1604 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable
]=b |
| 1605 * compare |
| 1606 = a |
| 1607 = equal |
| 1608 < z |
| 1609 < x |
| 1610 = b # x had become the new first primary ignorable |
| 1611 < w |
| 1612 |
| 1613 ** test: DataDrivenCollationTest/TestShiftedIgnorable |
| 1614 # The UCA states that primary ignorables should be completely |
| 1615 # ignorable when following a shifted code point. |
| 1616 @ root |
| 1617 % alternate=shifted |
| 1618 % strength=quaternary |
| 1619 * compare |
| 1620 < a\u0020b |
| 1621 = a\u0020\u0300b |
| 1622 = a\u0020\u0301b |
| 1623 < a_b |
| 1624 = a_\u0300b |
| 1625 = a_\u0301b |
| 1626 < A\u0020b |
| 1627 = A\u0020\u0300b |
| 1628 = A\u0020\u0301b |
| 1629 < A_b |
| 1630 = A_\u0300b |
| 1631 = A_\u0301b |
| 1632 < a\u0301b |
| 1633 < A\u0301b |
| 1634 < a\u0300b |
| 1635 < A\u0300b |
| 1636 |
| 1637 ** test: DataDrivenCollationTest/TestNShiftedIgnorable |
| 1638 # The UCA states that primary ignorables should be completely |
| 1639 # ignorable when following a shifted code point. |
| 1640 @ root |
| 1641 % alternate=non-ignorable |
| 1642 % strength=tertiary |
| 1643 * compare |
| 1644 < a\u0020b |
| 1645 < A\u0020b |
| 1646 < a\u0020\u0301b |
| 1647 < A\u0020\u0301b |
| 1648 < a\u0020\u0300b |
| 1649 < A\u0020\u0300b |
| 1650 < a_b |
| 1651 < A_b |
| 1652 < a_\u0301b |
| 1653 < A_\u0301b |
| 1654 < a_\u0300b |
| 1655 < A_\u0300b |
| 1656 < a\u0301b |
| 1657 < A\u0301b |
| 1658 < a\u0300b |
| 1659 < A\u0300b |
| 1660 |
| 1661 ** test: DataDrivenCollationTest/TestSafeSurrogates |
| 1662 # It turned out that surrogates were not skipped properly |
| 1663 # when iterating backwards if they were in the middle of a |
| 1664 # contraction. This test assures that this is fixed. |
| 1665 @ rules |
| 1666 &a < x\ud800\udc00b |
| 1667 * compare |
| 1668 < a |
| 1669 < x\ud800\udc00b |
| 1670 |
| 1671 ** test: DataDrivenCollationTest/da_TestPrimary |
| 1672 # This test goes through primary strength cases |
| 1673 @ locale da |
| 1674 % strength=primary |
| 1675 * compare |
| 1676 < Lvi |
| 1677 < Lwi |
| 1678 * compare |
| 1679 < L\u00e4vi |
| 1680 < L\u00f6wi |
| 1681 * compare |
| 1682 < L\u00fcbeck |
| 1683 = Lybeck |
| 1684 |
| 1685 ** test: DataDrivenCollationTest/da_TestTertiary |
| 1686 # This test goes through tertiary strength cases |
| 1687 @ locale da |
| 1688 % strength=tertiary |
| 1689 * compare |
| 1690 < Luc |
| 1691 < luck |
| 1692 * compare |
| 1693 < luck |
| 1694 < L\u00fcbeck |
| 1695 * compare |
| 1696 < lybeck |
| 1697 < L\u00fcbeck |
| 1698 * compare |
| 1699 < L\u00e4vi |
| 1700 < L\u00f6we |
| 1701 * compare |
| 1702 < L\u00f6ww |
| 1703 < mast |
| 1704 |
| 1705 * compare |
| 1706 < A/S |
| 1707 < ANDRE |
| 1708 < ANDR\u00c9 |
| 1709 < ANDREAS |
| 1710 < AS |
| 1711 < CA |
| 1712 < \u00c7A |
| 1713 < CB |
| 1714 < \u00c7C |
| 1715 < D.S.B. |
| 1716 < DA |
| 1717 < \u00d0A |
| 1718 < DB |
| 1719 < \u00d0C |
| 1720 < DSB |
| 1721 < DSC |
| 1722 < EKSTRA_ARBEJDE |
| 1723 < EKSTRABUD0 |
| 1724 < H\u00d8ST |
| 1725 < HAAG |
| 1726 < H\u00c5NDBOG |
| 1727 < HAANDV\u00c6RKSBANKEN |
| 1728 < Karl |
| 1729 < karl |
| 1730 < NIELS\u0020J\u00d8RGEN |
| 1731 < NIELS-J\u00d8RGEN |
| 1732 < NIELSEN |
| 1733 < R\u00c9E,\u0020A |
| 1734 < REE,\u0020B |
| 1735 < R\u00c9E,\u0020L |
| 1736 < REE,\u0020V |
| 1737 < SCHYTT,\u0020B |
| 1738 < SCHYTT,\u0020H |
| 1739 < SCH\u00dcTT,\u0020H |
| 1740 < SCHYTT,\u0020L |
| 1741 < SCH\u00dcTT,\u0020M |
| 1742 < SS |
| 1743 < \u00df |
| 1744 < SSA |
| 1745 < STORE\u0020VILDMOSE |
| 1746 < STOREK\u00c6R0 |
| 1747 < STORM\u0020PETERSEN |
| 1748 < STORMLY |
| 1749 < THORVALD |
| 1750 < THORVARDUR |
| 1751 < \u00feORVAR\u00d0UR |
| 1752 < THYGESEN |
| 1753 < VESTERG\u00c5RD,\u0020A |
| 1754 < VESTERGAARD,\u0020A |
| 1755 < VESTERG\u00c5RD,\u0020B |
| 1756 < \u00c6BLE |
| 1757 < \u00c4BLE |
| 1758 < \u00d8BERG |
| 1759 < \u00d6BERG |
| 1760 |
| 1761 * compare |
| 1762 < andere |
| 1763 < chaque |
| 1764 < chemin |
| 1765 < cote |
| 1766 < cot\u00e9 |
| 1767 < c\u00f4te |
| 1768 < c\u00f4t\u00e9 |
| 1769 < \u010du\u010d\u0113t |
| 1770 < Czech |
| 1771 < hi\u0161a |
| 1772 < irdisch |
| 1773 < lie |
| 1774 < lire |
| 1775 < llama |
| 1776 < l\u00f5ug |
| 1777 < l\u00f2za |
| 1778 < lu\u010d |
| 1779 < luck |
| 1780 < L\u00fcbeck |
| 1781 < lye |
| 1782 < l\u00e4vi |
| 1783 < L\u00f6wen |
| 1784 < m\u00e0\u0161ta |
| 1785 < m\u00eer |
| 1786 < myndig |
| 1787 < M\u00e4nner |
| 1788 < m\u00f6chten |
| 1789 < pi\u00f1a |
| 1790 < pint |
| 1791 < pylon |
| 1792 < \u0161\u00e0ran |
| 1793 < savoir |
| 1794 < \u0160erb\u016bra |
| 1795 < Sietla |
| 1796 < \u015blub |
| 1797 < subtle |
| 1798 < symbol |
| 1799 < s\u00e4mtlich |
| 1800 < verkehrt |
| 1801 < vox |
| 1802 < v\u00e4ga |
| 1803 < waffle |
| 1804 < wood |
| 1805 < yen |
| 1806 < yuan |
| 1807 < yucca |
| 1808 < \u017eal |
| 1809 < \u017eena |
| 1810 < \u017den\u0113va |
| 1811 < zoo0 |
| 1812 < Zviedrija |
| 1813 < Z\u00fcrich |
| 1814 < zysk0 |
| 1815 < \u00e4ndere |
| 1816 |
| 1817 ** test: DataDrivenCollationTest/hi_TestNewRules |
| 1818 # This test goes through new rules and tests against old rules |
| 1819 @ locale hi |
| 1820 * compare |
| 1821 < कॐ |
| 1822 < कं |
| 1823 < कँ |
| 1824 < कः |
| 1825 |
| 1826 ** test: DataDrivenCollationTest/ro_TestNewRules |
| 1827 # This test goes through new rules and tests against old rules |
| 1828 @ locale ro |
| 1829 * compare |
| 1830 < xAx |
| 1831 < xă |
| 1832 < xĂ |
| 1833 < Xă |
| 1834 < XĂ |
| 1835 < xăx |
| 1836 < xĂx |
| 1837 < xâ |
| 1838 < x |
| 1839 < Xâ |
| 1840 < XÂ |
| 1841 < xâx |
| 1842 < xÂx |
| 1843 < xb |
| 1844 < xIx |
| 1845 < xî |
| 1846 < xÎ |
| 1847 < Xî |
| 1848 < XÎ |
| 1849 < xîx |
| 1850 < xÎx |
| 1851 < xj |
| 1852 < xSx |
| 1853 < xș |
| 1854 = xş |
| 1855 < xȘ |
| 1856 = xŞ |
| 1857 < Xș |
| 1858 = Xş |
| 1859 < XȘ |
| 1860 = XŞ |
| 1861 < xșx |
| 1862 = xşx |
| 1863 < xȘx |
| 1864 = xŞx |
| 1865 < xT |
| 1866 < xTx |
| 1867 < xț |
| 1868 = xţ |
| 1869 < xȚ |
| 1870 = xŢ |
| 1871 < Xț |
| 1872 = Xţ |
| 1873 < XȚ |
| 1874 = XŢ |
| 1875 < xțx |
| 1876 = xţx |
| 1877 < xȚx |
| 1878 = xŢx |
| 1879 < xU |
| 1880 |
| 1881 ** test: DataDrivenCollationTest/testOffsets |
| 1882 # This tests cases where forwards and backwards iteration get different offsets |
| 1883 @ locale en |
| 1884 % strength=tertiary |
| 1885 * compare |
| 1886 < a\uD800\uDC00\uDC00 |
| 1887 < b\uD800\uDC00\uDC00 |
| 1888 * compare |
| 1889 < \u0301A\u0301\u0301 |
| 1890 < \u0301B\u0301\u0301 |
| 1891 * compare |
| 1892 < abcd\r\u0301 |
| 1893 < abce\r\u0301 |
| 1894 # TODO: test offsets in new CollationTest |
| 1895 |
| 1896 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt. |
| 1897 |
| 1898 ** test: was ICU 52 cmsccoll/TestRedundantRules |
| 1899 @ rules |
| 1900 & a < b < c < d& [before 1] c < m |
| 1901 * compare |
| 1902 <1 a |
| 1903 <1 b |
| 1904 <1 m |
| 1905 <1 c |
| 1906 <1 d |
| 1907 |
| 1908 @ rules |
| 1909 & a < b <<< c << d <<< e& [before 3] e <<< x |
| 1910 * compare |
| 1911 <1 a |
| 1912 <1 b |
| 1913 <3 c |
| 1914 <2 d |
| 1915 <3 x |
| 1916 <3 e |
| 1917 |
| 1918 @ rules |
| 1919 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x |
| 1920 * compare |
| 1921 <1 a |
| 1922 <1 b |
| 1923 <3 c |
| 1924 <2 d |
| 1925 <3 e |
| 1926 <3 f |
| 1927 <1 x |
| 1928 <1 g |
| 1929 |
| 1930 @ rules |
| 1931 & a <<< b << c < d& a < m |
| 1932 * compare |
| 1933 <1 a |
| 1934 <3 b |
| 1935 <2 c |
| 1936 <1 m |
| 1937 <1 d |
| 1938 |
| 1939 @ rules |
| 1940 &a<b<<b\u0301 &z<b |
| 1941 * compare |
| 1942 <1 a |
| 1943 <1 b\u0301 |
| 1944 <1 z |
| 1945 <1 b |
| 1946 |
| 1947 @ rules |
| 1948 &z<m<<<q<<<m |
| 1949 * compare |
| 1950 <1 z |
| 1951 <1 q |
| 1952 <3 m |
| 1953 |
| 1954 @ rules |
| 1955 &z<<<m<q<<<m |
| 1956 * compare |
| 1957 <1 z |
| 1958 <1 q |
| 1959 <3 m |
| 1960 |
| 1961 @ rules |
| 1962 & a < b < c < d& r < c |
| 1963 * compare |
| 1964 <1 a |
| 1965 <1 b |
| 1966 <1 d |
| 1967 <1 r |
| 1968 <1 c |
| 1969 |
| 1970 @ rules |
| 1971 & a < b < c < d& c < m |
| 1972 * compare |
| 1973 <1 a |
| 1974 <1 b |
| 1975 <1 c |
| 1976 <1 m |
| 1977 <1 d |
| 1978 |
| 1979 @ rules |
| 1980 & a < b < c < d& a < m |
| 1981 * compare |
| 1982 <1 a |
| 1983 <1 m |
| 1984 <1 b |
| 1985 <1 c |
| 1986 <1 d |
| 1987 |
| 1988 ** test: was ICU 52 cmsccoll/TestExpansionSyntax |
| 1989 # The following two rules should sort the particular list of strings the same. |
| 1990 @ rules |
| 1991 &AE <<< a << b <<< c &d <<< f |
| 1992 * compare |
| 1993 <1 AE |
| 1994 <3 a |
| 1995 <2 b |
| 1996 <3 c |
| 1997 <1 d |
| 1998 <3 f |
| 1999 |
| 2000 @ rules |
| 2001 &A <<< a / E << b / E <<< c /E &d <<< f |
| 2002 * compare |
| 2003 <1 AE |
| 2004 <3 a |
| 2005 <2 b |
| 2006 <3 c |
| 2007 <1 d |
| 2008 <3 f |
| 2009 |
| 2010 # The following two rules should sort the particular list of strings the same. |
| 2011 @ rules |
| 2012 &AE <<< a <<< b << c << d < e < f <<< g |
| 2013 * compare |
| 2014 <1 AE |
| 2015 <3 a |
| 2016 <3 b |
| 2017 <2 c |
| 2018 <2 d |
| 2019 <1 e |
| 2020 <1 f |
| 2021 <3 g |
| 2022 |
| 2023 @ rules |
| 2024 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g |
| 2025 * compare |
| 2026 <1 AE |
| 2027 <3 a |
| 2028 <3 b |
| 2029 <2 c |
| 2030 <2 d |
| 2031 <1 e |
| 2032 <1 f |
| 2033 <3 g |
| 2034 |
| 2035 # The following two rules should sort the particular list of strings the same. |
| 2036 @ rules |
| 2037 &AE <<< B <<< C / D <<< F |
| 2038 * compare |
| 2039 <1 AE |
| 2040 <3 B |
| 2041 <3 F |
| 2042 <1 AED |
| 2043 <3 C |
| 2044 |
| 2045 @ rules |
| 2046 &A <<< B / E <<< C / ED <<< F / E |
| 2047 * compare |
| 2048 <1 AE |
| 2049 <3 B |
| 2050 <3 F |
| 2051 <1 AED |
| 2052 <3 C |
| 2053 |
| 2054 ** test: never reorder trailing primaries |
| 2055 @ root |
| 2056 % reorder Zzzz Grek |
| 2057 * compare |
| 2058 <1 L |
| 2059 <1 字 |
| 2060 <1 Ω |
| 2061 <1 \uFFFD |
| 2062 <1 \uFFFF |
| 2063 |
| 2064 ** test: fall back to mappings with shorter prefixes, not immediately to ones wi
th no prefixes |
| 2065 @ rules |
| 2066 &u=ab|cd |
| 2067 &v=b|ce |
| 2068 * compare |
| 2069 <1 abc |
| 2070 <1 abcc |
| 2071 <1 abcf |
| 2072 <1 abcd |
| 2073 = abu |
| 2074 <1 abce |
| 2075 = abv |
| 2076 |
| 2077 # With the following rules, there is only one prefix per composite ĉ or ç, |
| 2078 # but both prefixes apply to just c in NFD form. |
| 2079 # We would get different results for composed vs. NFD input |
| 2080 # if we fell back directly from longest-prefix mappings to no-prefix mappings. |
| 2081 @ rules |
| 2082 &x=op|ĉ |
| 2083 &y=p|ç |
| 2084 * compare |
| 2085 <1 opc |
| 2086 <2 opć |
| 2087 <1 opcz |
| 2088 <1 opd |
| 2089 <1 opĉ |
| 2090 = opc\u0302 |
| 2091 = opx |
| 2092 <1 opç |
| 2093 = opc\u0327 |
| 2094 = opy |
| 2095 |
| 2096 # The mapping is used which has the longest matching prefix for which |
| 2097 # there is also a suffix match, with the longest suffix match among several for
that prefix. |
| 2098 @ rules |
| 2099 &❶=d |
| 2100 &❷=de |
| 2101 &❸=def |
| 2102 &①=c|d |
| 2103 &②=c|de |
| 2104 &③=c|def |
| 2105 &④=bc|d |
| 2106 &⑤=bc|de |
| 2107 &⑥=bc|def |
| 2108 &⑦=abc|d |
| 2109 &⑧=abc|de |
| 2110 &⑨=abc|def |
| 2111 * compare |
| 2112 <1 9aadzz |
| 2113 = 9aa❶zz |
| 2114 <1 9aadez |
| 2115 = 9aa❷z |
| 2116 <1 9aadef |
| 2117 = 9aa❸ |
| 2118 <1 9acdzz |
| 2119 = 9ac①zz |
| 2120 <1 9acdez |
| 2121 = 9ac②z |
| 2122 <1 9acdef |
| 2123 = 9ac③ |
| 2124 <1 9bcdzz |
| 2125 = 9bc④zz |
| 2126 <1 9bcdez |
| 2127 = 9bc⑤z |
| 2128 <1 9bcdef |
| 2129 = 9bc⑥ |
| 2130 <1 abcdzz |
| 2131 = abc⑦zz |
| 2132 <1 abcdez |
| 2133 = abc⑧z |
| 2134 <1 abcdef |
| 2135 = abc⑨ |
| 2136 |
| 2137 ** test: prefix + discontiguous contraction with missing prefix contraction |
| 2138 # Unfortunate terminology: The first "prefix" here is the pre-context, |
| 2139 # the second "prefix" refers to the contraction/relation string that is |
| 2140 # one shorter than the one being tested. |
| 2141 @ rules |
| 2142 &x=p|e |
| 2143 &y=p|ê |
| 2144 &z=op|ê |
| 2145 # No mapping for op|e: |
| 2146 # Discontiguous contraction matching should not match op|ê in opệ |
| 2147 # because it would have to skip the dot below and extend a match on op|e by the
circumflex, |
| 2148 # but there is no match on op|e. |
| 2149 * compare |
| 2150 <1 oPe |
| 2151 <1 ope |
| 2152 = opx |
| 2153 <1 opệ |
| 2154 = opy\u0323 # y not z |
| 2155 <1 opê |
| 2156 = opz |
| 2157 |
| 2158 # We cannot test for fallback by whether the contraction default CE32 |
| 2159 # is for another contraction. With the following rules, there is no mapping for
op|e, |
| 2160 # and the fallback to prefix p has no contractions. |
| 2161 @ rules |
| 2162 &x=p|e |
| 2163 &z=op|ê |
| 2164 * compare |
| 2165 <1 oPe |
| 2166 <1 ope |
| 2167 = opx |
| 2168 <2 opệ |
| 2169 = opx\u0323\u0302 # x not z |
| 2170 <1 opê |
| 2171 = opz |
| 2172 |
| 2173 # One more variation: Fallback to the simple code point, no shorter non-empty pr
efix. |
| 2174 @ rules |
| 2175 &x=e |
| 2176 &z=op|ê |
| 2177 * compare |
| 2178 <1 ope |
| 2179 = opx |
| 2180 <3 oPe |
| 2181 = oPx |
| 2182 <2 opệ |
| 2183 = opx\u0323\u0302 # x not z |
| 2184 <1 opê |
| 2185 = opz |
| 2186 |
| 2187 ** test: maxVariable via rules |
| 2188 @ rules |
| 2189 [maxVariable space][alternate shifted] |
| 2190 * compare |
| 2191 = \u0020 |
| 2192 = \u000A |
| 2193 <1 . |
| 2194 <1 ° # degree sign |
| 2195 <1 $ |
| 2196 <1 0 |
| 2197 |
| 2198 ** test: maxVariable via setting |
| 2199 @ root |
| 2200 % maxVariable=currency |
| 2201 % alternate=shifted |
| 2202 * compare |
| 2203 = \u0020 |
| 2204 = \u000A |
| 2205 = . |
| 2206 = ° # degree sign |
| 2207 = $ |
| 2208 <1 0 |
| 2209 |
| 2210 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää) |
| 2211 # This tests canonical closure, but it also tests that CollationFastLatin |
| 2212 # bails out properly for contractions with combining marks. |
| 2213 # For that we need pairs of strings that remain in the Latin fastpath |
| 2214 # long enough, hence the extra "= b" lines. |
| 2215 @ rules |
| 2216 &b=\u00e4\u00e4 |
| 2217 * compare |
| 2218 <1 b |
| 2219 = \u00e4\u00e4 |
| 2220 = b |
| 2221 = a\u0308a\u0308 |
| 2222 = b |
| 2223 = \u00e4a\u0308 |
| 2224 = b |
| 2225 = a\u0308\u00e4 |
| 2226 |
| 2227 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å) |
| 2228 @ rules |
| 2229 &b=\u00C5 |
| 2230 * compare |
| 2231 <1 b |
| 2232 = \u00C5 |
| 2233 = b |
| 2234 = A\u030A |
| 2235 = b |
| 2236 = \u212B |
| 2237 |
| 2238 ** test: reset-before on already-tailored characters, ICU ticket 10108 |
| 2239 @ rules |
| 2240 &a<w<<x &[before 2]x<<y |
| 2241 * compare |
| 2242 <1 a |
| 2243 <1 w |
| 2244 <2 y |
| 2245 <2 x |
| 2246 |
| 2247 @ rules |
| 2248 &a<<w<<<x &[before 2]x<<y |
| 2249 * compare |
| 2250 <1 a |
| 2251 <2 y |
| 2252 <2 w |
| 2253 <3 x |
| 2254 |
| 2255 @ rules |
| 2256 &a<w<x &[before 2]x<<y |
| 2257 * compare |
| 2258 <1 a |
| 2259 <1 w |
| 2260 <1 y |
| 2261 <2 x |
| 2262 |
| 2263 @ rules |
| 2264 &a<w<<<x &[before 2]x<<y |
| 2265 * compare |
| 2266 <1 a |
| 2267 <1 y |
| 2268 <2 w |
| 2269 <3 x |
| 2270 |
| 2271 ** test: numeric collation with other settings, ICU ticket 9092 |
| 2272 @ root |
| 2273 % strength=identical |
| 2274 % caseFirst=upper |
| 2275 % numeric=on |
| 2276 * compare |
| 2277 <1 100\u0020a |
| 2278 <1 101 |
| 2279 |
| 2280 ** test: collation type fallback from unsupported type, ICU ticket 10149 |
| 2281 @ locale fr-CA-u-co-phonebk |
| 2282 # Expect the same result as with fr-CA, using backwards-secondary order. |
| 2283 # That is, we should fall back from the unsupported collation type |
| 2284 # to the locale's default collation type. |
| 2285 * compare |
| 2286 <1 cote |
| 2287 <2 côte |
| 2288 <2 coté |
| 2289 <2 côté |
| 2290 |
| 2291 ** test: @ is equivalent to [backwards 2], ICU ticket 9956 |
| 2292 @ rules |
| 2293 &b<a @ &v<<w |
| 2294 * compare |
| 2295 <1 b |
| 2296 <1 a |
| 2297 <1 cote |
| 2298 <2 côte |
| 2299 <2 coté |
| 2300 <2 côté |
| 2301 <1 v |
| 2302 <2 w |
| 2303 <1 x |
| 2304 |
| 2305 ** test: shifted+reordering, ICU ticket 9507 |
| 2306 @ root |
| 2307 % reorder Grek punct space |
| 2308 % alternate=shifted |
| 2309 % strength=quaternary |
| 2310 # Which primaries are "variable" should be determined without script reordering, |
| 2311 # and then primaries should be reordered whether they are shifted to quaternary
or not. |
| 2312 * compare |
| 2313 <4 ( # punctuation |
| 2314 <4 ) |
| 2315 <4 \u0020 # space |
| 2316 <1 ` # symbol |
| 2317 <1 ^ |
| 2318 <1 $ # currency symbol |
| 2319 <1 € |
| 2320 <1 0 # numbers |
| 2321 <1 ε # Greek |
| 2322 <1 e # Latin |
| 2323 <1 e(e |
| 2324 <4 e)e |
| 2325 <4 e\u0020e |
| 2326 <4 ee |
| 2327 <3 e(E |
| 2328 <4 e)E |
| 2329 <4 e\u0020E |
| 2330 <4 eE |
| 2331 |
| 2332 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 935
1 |
| 2333 @ rules |
| 2334 &\u0001<<<b<<<B |
| 2335 % caseFirst=upper |
| 2336 * compare |
| 2337 <1 aaa |
| 2338 <3 aaaB |
| 2339 |
| 2340 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355 |
| 2341 @ rules |
| 2342 &\u0001<<<b<<<B |
| 2343 % strength=secondary |
| 2344 % caseLevel=on |
| 2345 * compare |
| 2346 <1 a |
| 2347 = ab |
| 2348 = aB |
| 2349 |
| 2350 ** test: custom collation rules involving tail of a contraction in Malayalam, IC
U ticket 6328 |
| 2351 @ rules |
| 2352 &[before 2] ൌ << ൗ # U+0D57 << U+0D4C == 0D46+0D57 |
| 2353 * compare |
| 2354 <1 ൗx |
| 2355 <2 ൌx |
| 2356 <1 ൗy |
| 2357 <2 ൌy |
| 2358 |
| 2359 ** test: quoted apostrophe in compact syntax, ICU ticket 8204 |
| 2360 @ rules |
| 2361 &q<<*a''c |
| 2362 * compare |
| 2363 <1 d |
| 2364 <1 p |
| 2365 <1 q |
| 2366 <2 a |
| 2367 <2 \u0027 |
| 2368 <2 c |
| 2369 <1 r |
| 2370 |
| 2371 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstan
ce()" |
| 2372 ** test: locale -u- with collation keywords, ICU ticket 8260 |
| 2373 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4 |
| 2374 * compare |
| 2375 <4 \u0020 # space is shifted, strength=quaternary |
| 2376 <1 ! # punctuation is regular |
| 2377 <1 2 |
| 2378 <1 12 # numeric sorting |
| 2379 <1 B |
| 2380 <c b # uppercase first on case level |
| 2381 <1 x\u0301\u0308 |
| 2382 <2 x\u0308\u0301 # normalization off |
| 2383 |
| 2384 ** test: locale @ with collation keywords, ICU ticket 8260 |
| 2385 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shi
fted |
| 2386 * compare |
| 2387 <4 $ # currency symbols are shifted, strength=quaternary |
| 2388 <1 àla |
| 2389 <2 alà # backwards secondary level |
| 2390 |
| 2391 ** test: locale -u- with script reordering, ICU ticket 8260 |
| 2392 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai |
| 2393 * compare |
| 2394 <1 \u0020 |
| 2395 <1 あ |
| 2396 <1 ☂ |
| 2397 <1 Ω |
| 2398 <1 丂 |
| 2399 <1 ж |
| 2400 <1 L |
| 2401 <1 4 |
| 2402 <1 Ձ |
| 2403 <1 अ |
| 2404 <1 ሄ |
| 2405 <1 ฉ |
| 2406 |
| 2407 ** test: locale @collation=type should be case-insensitive |
| 2408 @ locale de@coLLation=PhoneBook |
| 2409 * compare |
| 2410 <1 ae |
| 2411 <2 ä |
| 2412 <3 Ä |
| 2413 |
| 2414 ** test: import root search rules plus German phonebook rules, ICU ticket 8962 |
| 2415 @ locale de-u-co-search |
| 2416 * compare |
| 2417 <1 = |
| 2418 <1 ≠ |
| 2419 <1 a |
| 2420 <1 ae |
| 2421 <2 ä |
| 2422 |
| 2423 # Once more, but with runtime builder. |
| 2424 @ rules |
| 2425 [import und-u-co-search][import de-u-co-phonebk] |
| 2426 * compare |
| 2427 <1 = |
| 2428 <1 ≠ |
| 2429 <1 a |
| 2430 <1 ae |
| 2431 <2 ä |
| 2432 |
| 2433 # Once again, with import from "root" not "und" (as in a proper language tag). |
| 2434 @ rules |
| 2435 [import root-u-co-search][import de-u-co-phonebk] |
| 2436 * compare |
| 2437 <1 = |
| 2438 <1 ≠ |
| 2439 <1 a |
| 2440 <1 ae |
| 2441 <2 ä |
| 2442 |
| 2443 ** test: import rules from a language with non-Latin native script, and reset th
e reordering, ICU ticket 10998 |
| 2444 # Greek should sort Greek first. |
| 2445 @ rules |
| 2446 [import el] |
| 2447 * compare |
| 2448 <1 4 |
| 2449 <1 Ω |
| 2450 <1 L |
| 2451 |
| 2452 # Import Greek, and then reset the reordering. |
| 2453 @ rules |
| 2454 [import el][reorder Zzzz] |
| 2455 * compare |
| 2456 <1 4 |
| 2457 <1 L |
| 2458 <1 Ω |
| 2459 |
| 2460 # "others" is a synonym for Zzzz. |
| 2461 @ rules |
| 2462 [import el][reorder others] |
| 2463 * compare |
| 2464 <1 4 |
| 2465 <1 L |
| 2466 <1 Ω |
OLD | NEW |