| Index: source/test/testdata/collationtest.txt
|
| diff --git a/source/test/testdata/collationtest.txt b/source/test/testdata/collationtest.txt
|
| deleted file mode 100644
|
| index 3a703cb10b6a091cb39eb33b96a4ce7b18d0ae53..0000000000000000000000000000000000000000
|
| --- a/source/test/testdata/collationtest.txt
|
| +++ /dev/null
|
| @@ -1,2540 +0,0 @@
|
| -# Copyright (c) 2012-2015 International Business Machines
|
| -# Corporation and others. All Rights Reserved.
|
| -#
|
| -# This file should be in UTF-8 with a signature byte sequence ("BOM").
|
| -#
|
| -# collationtest.txt: Collation test data.
|
| -#
|
| -# created on: 2012apr13
|
| -# created by: Markus W. Scherer
|
| -
|
| -# A line with "** test: description" is used for verbose and error output.
|
| -
|
| -# A collator can be set with "@ root" or "@ locale language-tag",
|
| -# for example "@ locale de-u-co-phonebk".
|
| -# An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
|
| -
|
| -# A collator can be built with "@ rules".
|
| -# An "@ rules" line is followed by one or more lines with the tailoring rules.
|
| -
|
| -# A collator can be modified with "% attribute=value".
|
| -
|
| -# "* compare" tests the order (= or <) of the following strings.
|
| -# The relation can be "=" or "<" (the level of the difference is not specified)
|
| -# or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
|
| -
|
| -# Test sections ("* compare") are terminated by
|
| -# definitions of new collators, changing attributes, or new test sections.
|
| -
|
| -** test: simple CEs & expansions
|
| -# Many types of mappings are tested elsewhere, including via the UCA conformance tests.
|
| -# Here we mostly cover a few unusual mappings.
|
| -@ rules
|
| -&\x01 # most control codes are ignorable
|
| -<<<\u0300 # tertiary CE
|
| -&9<\x00 # NUL not ignorable
|
| -&\uA00A\uA00B=\uA002 # two long-primary CEs
|
| -&\uA00A\uA00B\u00050005=\uA003 # three CEs, require 64 bits
|
| -
|
| -* compare
|
| -= \x01
|
| -= \x02
|
| -<3 \u0300
|
| -<1 9
|
| -<1 \x00
|
| -= \x01\x00\x02
|
| -<1 a
|
| -<3 a\u0300
|
| -<2 a\u0308
|
| -= ä
|
| -<1 b
|
| -<1 か # Hiragana Ka (U+304B)
|
| -<2 か\u3099 # plus voiced sound mark
|
| -= が # Hiragana Ga (U+304C)
|
| -<1 \uA00A\uA00B
|
| -= \uA002
|
| -<1 \uA00A\uA00B\u00050004
|
| -<1 \uA00A\uA00B\u00050005
|
| -= \uA003
|
| -<1 \uA00A\uA00B\u00050006
|
| -
|
| -** test: contractions
|
| -# Create some interesting mappings, and map some normalization-inert characters
|
| -# (which are not subject to canonical reordering)
|
| -# to some of the same CEs to check the sequence of CEs.
|
| -@ rules
|
| -
|
| -# Contractions starting with 'a' should not continue with any character < U+0300
|
| -# so that we can test a shortcut for that.
|
| -&a=ⓐ
|
| -&b<bz=ⓑ
|
| -&d<dz\u0301=ⓓ # d+z+acute
|
| -&z
|
| -<a\u0301=Ⓐ # a+acute sorts after z
|
| -<a\u0301\u0301=Ⓑ # a+acute+acute
|
| -<a\u0301\u0301\u0358=Ⓒ # a+acute+acute+dot above right
|
| -<a\u030a=Ⓓ # a+ring
|
| -<a\u0323=Ⓔ # a+dot below
|
| -<a\u0323\u0358=Ⓕ # a+dot below+dot above right
|
| -<a\u0327\u0323\u030a=Ⓖ # a+cedilla+dot below+ring
|
| -<a\u0327\u0323bz=Ⓗ # a+cedilla+dot below+b+z
|
| -
|
| -&\U0001D158=⁰ # musical notehead black (has a symbol primary)
|
| -<\U0001D158\U0001D165=¼ # musical quarter note
|
| -
|
| -# deliberately missing prefix contractions:
|
| -# dz
|
| -# a\u0327
|
| -# a\u0327\u0323
|
| -# a\u0327\u0323b
|
| -
|
| -&\x01
|
| -<<<\U0001D165=¹ # musical stem (ccc=216)
|
| -<<<\U0001D16D=² # musical augmentation dot (ccc=226)
|
| -<<<\U0001D165\U0001D16D=³ # stem+dot (ccc=216 226)
|
| -&\u0301=❶ # acute (ccc=230)
|
| -&\u030a=❷ # ring (ccc=230)
|
| -&\u0308=❸ # diaeresis (ccc=230)
|
| -<<\u0308\u0301=❹ # diaeresis+acute (=dialytika tonos) (ccc=230 230)
|
| -&\u0327=❺ # cedilla (ccc=202)
|
| -&\u0323=❻ # dot below (ccc=220)
|
| -&\u0331=❼ # macron below (ccc=220)
|
| -<<\u0331\u0358=❽ # macron below+dot above right (ccc=220 232)
|
| -&\u0334=❾ # tilde overlay (ccc=1)
|
| -&\u0358=❿ # dot above right (ccc=232)
|
| -
|
| -&\u0f71=① # tibetan vowel sign aa
|
| -&\u0f72=② # tibetan vowel sign i
|
| -# \u0f71\u0f72 # tibetan vowel sign aa + i = ii = U+0F73
|
| -&\u0f73=③ # tibetan vowel sign ii (ccc=0 but lccc=129)
|
| -
|
| -** test: simple contractions
|
| -
|
| -# Some strings are chosen to cause incremental contiguous contraction matching to
|
| -# go into partial matches for prefixes of contractions
|
| -# (where the prefixes are deliberately not also contractions).
|
| -# When there is no complete match, then the matching code must back out of those
|
| -# so that discontiguous contractions work as specified.
|
| -
|
| -* compare
|
| -# contraction starter with no following text, or mismatch, or blocked
|
| -<1 a
|
| -= ⓐ
|
| -<1 aa
|
| -= ⓐⓐ
|
| -<1 ab
|
| -= ⓐb
|
| -<1 az
|
| -= ⓐz
|
| -
|
| -* compare
|
| -<1 a
|
| -<2 a\u0308\u030a # ring blocked by diaeresis
|
| -= ⓐ❸❷
|
| -<2 a\u0327
|
| -= ⓐ❺
|
| -
|
| -* compare
|
| -<2 \u0308
|
| -= ❸
|
| -<2 \u0308\u030a\u0301 # acute blocked by ring
|
| -= ❸❷❶
|
| -
|
| -* compare
|
| -<1 \U0001D158
|
| -= ⁰
|
| -<1 \U0001D158\U0001D165
|
| -= ¼
|
| -
|
| -# no discontiguous contraction because of missing prefix contraction d+z,
|
| -# and a starter ('z') after the 'd'
|
| -* compare
|
| -<1 dz\u0323\u0301
|
| -= dz❻❶
|
| -
|
| -# contiguous contractions
|
| -* compare
|
| -<1 abz
|
| -= ⓐⓑ
|
| -<1 abzz
|
| -= ⓐⓑz
|
| -
|
| -* compare
|
| -<1 a
|
| -<1 z
|
| -<1 a\u0301
|
| -= Ⓐ
|
| -<1 a\u0301\u0301
|
| -= Ⓑ
|
| -<1 a\u0301\u0301\u0358
|
| -= Ⓒ
|
| -<1 a\u030a
|
| -= Ⓓ
|
| -<1 a\u0323\u0358
|
| -= Ⓕ
|
| -<1 a\u0327\u0323\u030a # match despite missing prefix
|
| -= Ⓖ
|
| -<1 a\u0327\u0323bz
|
| -= Ⓗ
|
| -
|
| -* compare
|
| -<2 \u0308\u0308\u0301 # acute blocked from first diaeresis, contracts with second
|
| -= ❸❹
|
| -
|
| -* compare
|
| -<1 \U0001D158\U0001D165
|
| -= ¼
|
| -
|
| -* compare
|
| -<3 \U0001D165\U0001D16D
|
| -= ³
|
| -
|
| -** test: discontiguous contractions
|
| -* compare
|
| -<1 a\u0327\u030a # a+ring skips cedilla
|
| -= Ⓓ❺
|
| -<2 a\u0327\u0327\u030a # a+ring skips 2 cedillas
|
| -= Ⓓ❺❺
|
| -<2 a\u0327\u0327\u0327\u030a # a+ring skips 3 cedillas
|
| -= Ⓓ❺❺❺
|
| -<2 a\u0334\u0327\u0327\u030a # a+ring skips tilde overlay & 2 cedillas
|
| -= Ⓓ❾❺❺
|
| -<1 a\u0327\u0323 # a+dot below skips cedilla
|
| -= Ⓔ❺
|
| -<1 a\u0323\u0301\u0358 # a+dot below+dot ab.r.: 2-char match, then skips acute
|
| -= Ⓕ❶
|
| -<2 a\u0334\u0323\u0358 # a+dot below skips tilde overlay
|
| -= Ⓕ❾
|
| -
|
| -* compare
|
| -<2 \u0331\u0331\u0358 # macron below+dot ab.r. skips the second macron below
|
| -= ❽❼
|
| -
|
| -* compare
|
| -<1 a\u0327\u0331\u0323\u030a # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
|
| -= Ⓓ❺❼❻
|
| -<1 a\u0327\u0323\U0001D16D\u030a # a+dot below skips cedilla
|
| -= Ⓔ❺²❷
|
| -<2 a\u0327\u0327\u0323\u030a # a+dot below skips 2 cedillas
|
| -= Ⓔ❺❺❷
|
| -<2 a\u0327\u0323\u0323\u030a # a+dot below skips cedilla
|
| -= Ⓔ❺❻❷
|
| -<2 a\u0334\u0327\u0323\u030a # a+dot below skips tilde overlay & cedilla
|
| -= Ⓔ❾❺❷
|
| -
|
| -* compare
|
| -<1 \U0001D158\u0327\U0001D165 # quarter note skips cedilla
|
| -= ¼❺
|
| -<1 a\U0001D165\u0323 # a+dot below skips stem
|
| -= Ⓔ¹
|
| -
|
| -# partial contiguous match, backs up, matches discontiguous contraction
|
| -<1 a\u0327\u0323b
|
| -= Ⓔ❺b
|
| -<1 a\u0327\u0323ba
|
| -= Ⓔ❺bⓐ
|
| -
|
| -# a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
|
| -* compare
|
| -<1 a\u0327\u0301\u0301\u0358
|
| -= Ⓒ❺
|
| -
|
| -# FCD but not NFD
|
| -* compare
|
| -<1 a\u0f73\u0301 # a+acute skips tibetan ii
|
| -= Ⓐ③
|
| -
|
| -# FCD but the 0f71 inside the 0f73 must be skipped
|
| -# to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
|
| -* compare
|
| -<1 \u0f71\u0f73 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
|
| -= ③①
|
| -
|
| -** test: discontiguous contractions with nested contractions
|
| -* compare
|
| -<1 a\u0323\u0308\u0301\u0358
|
| -= Ⓕ❹
|
| -<2 a\u0323\u0308\u0301\u0308\u0301\u0358
|
| -= Ⓕ❹❹
|
| -
|
| -** test: discontiguous contractions with interleaved contractions
|
| -* compare
|
| -# a+ring & cedilla & macron below+dot above right
|
| -<1 a\u0327\u0331\u030a\u0358
|
| -= Ⓓ❺❽
|
| -
|
| -# a+ring & 1x..3x macron below+dot above right
|
| -<2 a\u0331\u030a\u0358
|
| -= Ⓓ❽
|
| -<2 a\u0331\u0331\u030a\u0358\u0358
|
| -= Ⓓ❽❽
|
| -# also skips acute
|
| -<2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
|
| -= Ⓓ❽❽❽❶
|
| -
|
| -# a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
|
| -<1 a\U0001D165\u0323\U0001D16Ddz\u0301
|
| -= Ⓔ³ⓓ
|
| -
|
| -** test: some simple string comparisons
|
| -@ root
|
| -* compare
|
| -# first string compares against ""
|
| -= \u0000
|
| -< a
|
| -<1 b
|
| -<3 B
|
| -= \u0000B\u0000
|
| -
|
| -** test: compare with strength=primary
|
| -% strength=primary
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -= B
|
| -
|
| -** test: compare with strength=secondary
|
| -% strength=secondary
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -= B
|
| -
|
| -** test: compare with strength=tertiary
|
| -% strength=tertiary
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<3 B
|
| -
|
| -** test: compare with strength=quaternary
|
| -% strength=quaternary
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<3 B
|
| -
|
| -** test: compare with strength=identical
|
| -% strength=identical
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<3 B
|
| -
|
| -** test: côté with forwards secondary
|
| -@ root
|
| -* compare
|
| -<1 cote
|
| -<2 coté
|
| -<2 côte
|
| -<2 côté
|
| -
|
| -** test: côté with forwards secondary vs. U+FFFE merge separator
|
| -# Merged sort keys: On each level, any difference in the first segment
|
| -# must trump any further difference.
|
| -* compare
|
| -<1 cote\uFFFEcôté
|
| -<2 coté\uFFFEcôte
|
| -<2 côte\uFFFEcoté
|
| -<2 côté\uFFFEcote
|
| -
|
| -** test: côté with backwards secondary
|
| -% backwards=on
|
| -* compare
|
| -<1 cote
|
| -<2 côte
|
| -<2 coté
|
| -<2 côté
|
| -
|
| -** test: côté with backwards secondary vs. U+FFFE merge separator
|
| -# Merged sort keys: On each level, any difference in the first segment
|
| -# must trump any further difference.
|
| -* compare
|
| -<1 cote\uFFFEcôté
|
| -<2 côte\uFFFEcoté
|
| -<2 coté\uFFFEcôte
|
| -<2 côté\uFFFEcote
|
| -
|
| -** test: U+FFFE on identical level
|
| -@ root
|
| -% strength=identical
|
| -* compare
|
| -# All of these control codes are completely-ignorable, so that
|
| -# their low code points are compared with the merge separator.
|
| -# The merge separator must compare less than any other character.
|
| -<1 \uFFFE\u0001\u0002\u0003
|
| -<i \u0001\uFFFE\u0002\u0003
|
| -<i \u0001\u0002\uFFFE\u0003
|
| -<i \u0001\u0002\u0003\uFFFE
|
| -
|
| -* compare
|
| -# The merge separator must even compare less than U+0000.
|
| -<1 \uFFFE\u0000\u0000
|
| -<i \u0000\uFFFE\u0000
|
| -<i \u0000\u0000\uFFFE
|
| -
|
| -** test: Hani < surrogates < U+FFFD
|
| -# Note: compareUTF8() treats unpaired surrogates like U+FFFD,
|
| -# so with that the strings with surrogates will compare equal to each other
|
| -# and equal to the string with U+FFFD.
|
| -@ root
|
| -% strength=identical
|
| -* compare
|
| -<1 abz
|
| -<1 a\u4e00z
|
| -<1 a\U00020000z
|
| -<1 a\ud800z
|
| -<1 a\udbffz
|
| -<1 a\udc00z
|
| -<1 a\udfffz
|
| -<1 a\ufffdz
|
| -
|
| -** test: script reordering
|
| -@ root
|
| -% reorder Hani Zzzz digit
|
| -* compare
|
| -<1 ?
|
| -<1 +
|
| -<1 丂
|
| -<1 a
|
| -<1 α
|
| -<1 5
|
| -
|
| -% reorder default
|
| -* compare
|
| -<1 ?
|
| -<1 +
|
| -<1 5
|
| -<1 a
|
| -<1 α
|
| -<1 丂
|
| -
|
| -** test: empty rules
|
| -@ rules
|
| -* compare
|
| -<1 a
|
| -<2 ä
|
| -<3 Ä
|
| -<1 b
|
| -
|
| -** test: very simple rules
|
| -@ rules
|
| -&a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
|
| -% strength=quaternary
|
| -* compare
|
| -<1 a
|
| -= e
|
| -<4 q
|
| -<4 r
|
| -<1 x
|
| -<3 X
|
| -<2 y
|
| -<3 Y
|
| -<2 z
|
| -<3 Z
|
| -
|
| -** test: tailoring twice before a root position: primary
|
| -@ rules
|
| -&[before 1]b<p
|
| -&[before 1]b<q
|
| -* compare
|
| -<1 a
|
| -<1 p
|
| -<1 q
|
| -<1 b
|
| -
|
| -** test: tailoring twice before a root position: secondary
|
| -@ rules
|
| -&[before 2]ſ<<p
|
| -&[before 2]ſ<<q
|
| -* compare
|
| -<1 s
|
| -<2 p
|
| -<2 q
|
| -<2 ſ
|
| -
|
| -# secondary-before common weight
|
| -@ rules
|
| -&[before 2]b<<p
|
| -&[before 2]b<<q
|
| -* compare
|
| -<1 a
|
| -<1 p
|
| -<2 q
|
| -<2 b
|
| -
|
| -** test: tailoring twice before a root position: tertiary
|
| -@ rules
|
| -&[before 3]B<<<p
|
| -&[before 3]B<<<q
|
| -* compare
|
| -<1 b
|
| -<3 p
|
| -<3 q
|
| -<3 B
|
| -
|
| -# tertiary-before common weight
|
| -@ rules
|
| -&[before 3]b<<<p
|
| -&[before 3]b<<<q
|
| -* compare
|
| -<1 a
|
| -<1 p
|
| -<3 q
|
| -<3 b
|
| -
|
| -@ rules
|
| -&[before 2]b<<s
|
| -&[before 3]s<<<p
|
| -&[before 3]s<<<q
|
| -* compare
|
| -<1 a
|
| -<1 p
|
| -<3 q
|
| -<3 s
|
| -<2 b
|
| -
|
| -** test: tailor after completely ignorable
|
| -@ rules
|
| -&\x00<<<x<<y
|
| -* compare
|
| -= \x00
|
| -= \x1F
|
| -<3 x
|
| -<2 y
|
| -
|
| -** test: secondary tailoring gaps, ICU ticket 9362
|
| -@ rules
|
| -&[before 2]s<<'_'
|
| -&s<<r # secondary between s and ſ (long s)
|
| -&ſ<<*a-q # more than 15 between ſ and secondary CE boundary
|
| -&[before 2][first primary ignorable]<<u<<v # between secondary CE boundary & lowest secondary CE
|
| -&[last primary ignorable]<<y<<z
|
| -
|
| -* compare
|
| -<2 u
|
| -<2 v
|
| -<2 \u0332 # lowest secondary CE
|
| -<2 \u0308
|
| -<2 y
|
| -<2 z
|
| -<1 s_
|
| -<2 ss
|
| -<2 sr
|
| -<2 sſ
|
| -<2 sa
|
| -<2 sb
|
| -<2 sp
|
| -<2 sq
|
| -<2 sus
|
| -<2 svs
|
| -<2 rs
|
| -
|
| -** test: tertiary tailoring gaps, ICU ticket 9362
|
| -@ rules
|
| -&[before 3]t<<<'_'
|
| -&t<<<r # tertiary between t and fullwidth t
|
| -&ᵀ<<<*a-q # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
|
| -&[before 3][first secondary ignorable]<<<u<<<v # between tertiary CE boundary & lowest tertiary CE
|
| -&[last secondary ignorable]<<<y<<<z
|
| -
|
| -* compare
|
| -<3 u
|
| -<3 v
|
| -# Note: The root collator currently does not map any characters to tertiary CEs.
|
| -<3 y
|
| -<3 z
|
| -<1 t_
|
| -<3 tt
|
| -<3 tr
|
| -<3 tt
|
| -<3 tᵀ
|
| -<3 ta
|
| -<3 tb
|
| -<3 tp
|
| -<3 tq
|
| -<3 tut
|
| -<3 tvt
|
| -<3 rt
|
| -
|
| -** test: secondary & tertiary around root character
|
| -@ rules
|
| -&[before 2]m<<r
|
| -&m<<s
|
| -&[before 3]m<<<u
|
| -&m<<<v
|
| -* compare
|
| -<1 l
|
| -<1 r
|
| -<2 u
|
| -<3 m
|
| -<3 v
|
| -<2 s
|
| -<1 n
|
| -
|
| -** test: secondary & tertiary around tailored item
|
| -@ rules
|
| -&m<x
|
| -&[before 2]x<<r
|
| -&x<<s
|
| -&[before 3]x<<<u
|
| -&x<<<v
|
| -* compare
|
| -<1 m
|
| -<1 r
|
| -<2 u
|
| -<3 x
|
| -<3 v
|
| -<2 s
|
| -<1 n
|
| -
|
| -** test: more nesting of secondary & tertiary before
|
| -@ rules
|
| -&[before 3]m<<<u
|
| -&[before 2]m<<r
|
| -&[before 3]r<<<q
|
| -&m<<<w
|
| -&m<<t
|
| -&[before 3]w<<<v
|
| -&w<<<x
|
| -&w<<s
|
| -* compare
|
| -<1 l
|
| -<1 q
|
| -<3 r
|
| -<2 u
|
| -<3 m
|
| -<3 v
|
| -<3 w
|
| -<3 x
|
| -<2 s
|
| -<2 t
|
| -<1 n
|
| -
|
| -** test: case bits
|
| -@ rules
|
| -&w<x # tailored CE getting case bits
|
| - =uv=uV=Uv=UV # 2 chars -> 1 CE
|
| -&ae=ch=cH=Ch=CH # 2 chars -> 2 CEs
|
| -&rst=yz=yZ=Yz=YZ # 2 chars -> 3 CEs
|
| -% caseFirst=lower
|
| -* compare
|
| -<1 ae
|
| -= ch
|
| -<3 cH
|
| -<3 Ch
|
| -<3 CH
|
| -<1 rst
|
| -= yz
|
| -<3 yZ
|
| -<3 Yz
|
| -<3 YZ
|
| -<1 w
|
| -<1 x
|
| -= uv
|
| -<3 uV
|
| -= Uv # mixed case on single CE cannot distinguish variations
|
| -<3 UV
|
| -
|
| -** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
|
| -@ rules
|
| -&\u0001<<<t<<<T # tertiary CEs
|
| -% caseFirst=lower
|
| -* compare
|
| -<1 aa
|
| -<3 aat
|
| -<3 aaT
|
| -<3 aA
|
| -<3 aAt
|
| -<3 ata
|
| -<3 aTa
|
| -
|
| -** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
|
| -% caseFirst=upper
|
| -* compare
|
| -<1 aA
|
| -<3 aAt
|
| -<3 aa
|
| -<3 aat
|
| -<3 aaT
|
| -<3 ata
|
| -<3 aTa
|
| -
|
| -** test: reset on expansion, ICU tickets 9415 & 9593
|
| -@ rules
|
| -&æ<x # tailor the last primary CE so that x sorts between ae and af
|
| -&æb=bæ # copy all reset CEs to make bæ sort the same
|
| -&각<h # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
|
| -&⒀<<y # copy/tailor 4 CEs to make y sort with only a secondary difference
|
| -&l·=z # handle the pre-context for · when fetching reset CEs
|
| - <<u # copy/tailor 2 CEs
|
| -
|
| -* compare
|
| -<1 ae
|
| -<2 æ
|
| -<1 x
|
| -<1 af
|
| -
|
| -* compare
|
| -<1 aeb
|
| -<2 æb
|
| -= bæ
|
| -
|
| -* compare
|
| -<1 각
|
| -<1 h
|
| -<1 갂
|
| -<1 갃
|
| -
|
| -* compare
|
| -<1 · # by itself: primary CE
|
| -<1 l
|
| -<2 l· # l+middle dot has only a secondary difference from l
|
| -= z
|
| -<2 u
|
| -
|
| -* compare
|
| -<1 (13)
|
| -<3 ⒀ # DUCET sets special tertiary weights in all CEs
|
| -<2 y
|
| -<1 (13[
|
| -
|
| -% alternate=shifted
|
| -* compare
|
| -<1 (13)
|
| -= 13
|
| -<3 ⒀
|
| -= y # alternate=shifted removes the tailoring difference on the last CE
|
| -<1 14
|
| -
|
| -** test: contraction inside extension, ICU ticket 9378
|
| -@ rules
|
| -&а<<х/й # all letters are Cyrillic
|
| -* compare
|
| -<1 ай
|
| -<2 х
|
| -
|
| -** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
|
| -@ rules
|
| -&t<x &ᵀ<y # same primary weights
|
| -&q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
|
| -* compare
|
| -<1 q
|
| -<1 u
|
| -<1 v
|
| -<1 ꝗ
|
| -<1 t
|
| -<3 ᵀ
|
| -<1 y
|
| -<1 x
|
| -
|
| -# Principle: Each rule builds on the state of preceding rules and ignores following rules.
|
| -
|
| -** test: later rule does not affect earlier reset position, ICU ticket 10105
|
| -@ rules
|
| -&a < u < v < w &ov < x &b < v
|
| -* compare
|
| -<1 oa
|
| -<1 ou
|
| -<1 x # CE(o) followed by CE between u and w
|
| -<1 ow
|
| -<1 ob
|
| -<1 ov
|
| -
|
| -** test: later rule does not affect earlier extension (1), ICU ticket 10105
|
| -@ rules
|
| -&a=x/b &v=b
|
| -% strength=secondary
|
| -* compare
|
| -<1 B
|
| -<1 c
|
| -<1 v
|
| -= b
|
| -* compare
|
| -<1 AB
|
| -= x
|
| -<1 ac
|
| -<1 av
|
| -= ab
|
| -
|
| -** test: later rule does not affect earlier extension (2), ICU ticket 10105
|
| -@ rules
|
| -&a <<< c / e &g <<< e / l
|
| -% strength=secondary
|
| -* compare
|
| -<1 AE
|
| -= c
|
| -<2 æ
|
| -<1 agl
|
| -= ae
|
| -
|
| -** test: later rule does not affect earlier extension (3), ICU ticket 10105
|
| -@ rules
|
| -&a = b / c &d = c / e
|
| -% strength=secondary
|
| -* compare
|
| -<1 AC # C is still only tertiary different from the original c
|
| -= b
|
| -<1 ade
|
| -= ac
|
| -
|
| -** test: extension contains tailored character, ICU ticket 10105
|
| -@ rules
|
| -&a=e &b=u/e
|
| -* compare
|
| -<1 a
|
| -= e
|
| -<1 ba
|
| -= be
|
| -= u
|
| -
|
| -** test: add simple mappings for characters with root context
|
| -@ rules
|
| -&z=· # middle dot has a prefix mapping in the CLDR root
|
| -&n=и # и (U+0438) has contractions in the root
|
| -* compare
|
| -<1 l
|
| -<2 l· # root mapping for l|· still works
|
| -<1 z
|
| -= ·
|
| -* compare
|
| -<1 n
|
| -= и
|
| -<1 И
|
| -<1 и\u0306 # root mapping for й=и\u0306 still works
|
| -= й
|
| -<3 Й
|
| -
|
| -** test: add context mappings around characters with root context
|
| -@ rules
|
| -&z=·h # middle dot has a prefix mapping in the CLDR root
|
| -&n=ә|и # и (U+0438) has contractions in the root
|
| -* compare
|
| -<1 l
|
| -<2 l· # root mapping for l|· still works
|
| -<1 z
|
| -= ·h
|
| -* compare
|
| -<1 и
|
| -<3 И
|
| -<1 и\u0306 # root mapping for й=и\u0306 still works
|
| -= й
|
| -* compare
|
| -<1 әn
|
| -= әи
|
| -<1 әo
|
| -
|
| -** test: many secondary CEs at the top of their range
|
| -@ rules
|
| -&[last primary ignorable]<<*\u2801-\u28ff
|
| -* compare
|
| -<2 \u0308
|
| -<2 \u2801
|
| -<2 \u2802
|
| -<2 \u2803
|
| -<2 \u2804
|
| -<2 \u28fd
|
| -<2 \u28fe
|
| -<2 \u28ff
|
| -<1 \x20
|
| -
|
| -** test: many tertiary CEs at the top of their range
|
| -@ rules
|
| -&[last secondary ignorable]<<<*a-z
|
| -* compare
|
| -<3 a
|
| -<3 b
|
| -<3 c
|
| -<3 d
|
| -# e..w
|
| -<3 x
|
| -<3 y
|
| -<3 z
|
| -<2 \u0308
|
| -
|
| -** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
|
| -@ rules
|
| -&a=p|x &b=px &c=op
|
| -* compare
|
| -<1 b
|
| -= px
|
| -<3 B
|
| -<1 c
|
| -= op
|
| -<3 C
|
| -* compare
|
| -<1 ca
|
| -= opx # first contraction op, then prefix p|x
|
| -<3 cA
|
| -<3 Ca
|
| -
|
| -** test: reset position with prefix (pre-context), ICU ticket 10102
|
| -@ rules
|
| -&a=p|x &px=y
|
| -* compare
|
| -<1 pa
|
| -= px
|
| -= y
|
| -<3 pA
|
| -<1 q
|
| -<1 x
|
| -
|
| -** test: prefix+contraction together (1), ICU ticket 10071
|
| -@ rules
|
| -&x=a|bc
|
| -* compare
|
| -<1 ab
|
| -<1 Abc
|
| -<1 abd
|
| -<1 ac
|
| -<1 aw
|
| -<1 ax
|
| -= abc
|
| -<3 aX
|
| -<3 Ax
|
| -<1 b
|
| -<1 bb
|
| -<1 bc
|
| -<3 bC
|
| -<3 Bc
|
| -<1 bd
|
| -
|
| -** test: prefix+contraction together (2), ICU ticket 10071
|
| -@ rules
|
| -&w=bc &x=a|b
|
| -* compare
|
| -<1 w
|
| -= bc
|
| -<3 W
|
| -* compare
|
| -<1 aw
|
| -<1 ax
|
| -= ab
|
| -<3 aX
|
| -<1 axb
|
| -<1 axc
|
| -= abc # prefix match a|b takes precedence over contraction match bc
|
| -<3 abC
|
| -<1 abd
|
| -<1 ay
|
| -
|
| -** test: prefix+contraction together (3), ICU ticket 10071
|
| -@ rules
|
| -&x=a|b &w=bc # reverse order of rules as previous test, order should not matter here
|
| -* compare # same "compare" sequences as previous test
|
| -<1 w
|
| -= bc
|
| -<3 W
|
| -* compare
|
| -<1 aw
|
| -<1 ax
|
| -= ab
|
| -<3 aX
|
| -<1 axb
|
| -<1 axc
|
| -= abc # prefix match a|b takes precedence over contraction match bc
|
| -<3 abC
|
| -<1 abd
|
| -<1 ay
|
| -
|
| -** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
|
| -@ rules
|
| -&d=ch &v=p|ci
|
| -* compare
|
| -<1 pc
|
| -<3 pC
|
| -<1 pcH
|
| -<1 pcI
|
| -<1 pd
|
| -= pch # no-prefix contraction ch matches
|
| -<3 pD
|
| -<1 pv
|
| -= pci # prefix+contraction p|ci matches
|
| -<3 pV
|
| -
|
| -** test: tailor in & around compact ranges of root primaries
|
| -# The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
|
| -# which should be reliably encoded as one range in the root elements data.
|
| -@ rules
|
| -&[before 1]ᚁ<a
|
| -&ᚁ<b
|
| -&[before 1]ᚂ<c
|
| -&ᚂ<d
|
| -&[before 1]ᚚ<y
|
| -&ᚚ<z
|
| -&[before 2]ᚁ<<r
|
| -&ᚁ<<s
|
| -&[before 3]ᚚ<<<t
|
| -&ᚚ<<<u
|
| -* compare
|
| -<1 ᣵ # U+18F5 last Canadian Aboriginal
|
| -<1 a
|
| -<1 r
|
| -<2 ᚁ
|
| -<2 s
|
| -<1 b
|
| -<1 c
|
| -<1 ᚂ
|
| -<1 d
|
| -<1 ᚃ
|
| -<1 ᚙ
|
| -<1 y
|
| -<1 t
|
| -<3 ᚚ
|
| -<3 u
|
| -<1 z
|
| -<1 ᚠ # U+16A0 first Runic
|
| -
|
| -** test: suppressContractions
|
| -@ rules
|
| -&z<ch<әж [suppressContractions [·cә]]
|
| -* compare
|
| -<1 ch
|
| -<3 cH # ch was suppressed
|
| -<1 l
|
| -<1 l· # primary difference, not secondary, because l|· was suppressed
|
| -<1 ә
|
| -<2 ә\u0308 # secondary difference, not primary, because contractions for ә were suppressed
|
| -<1 әж
|
| -<3 әЖ
|
| -
|
| -** test: Hangul & Jamo
|
| -@ rules
|
| -&L=\u1100 # first Jamo L
|
| -&V=\u1161 # first Jamo V
|
| -&T=\u11A8 # first Jamo T
|
| -&\uAC01<<*\u4E00-\u4EFF # first Hangul LVT syllable & lots of secondary diffs
|
| -* compare
|
| -<1 Lv
|
| -<3 LV
|
| -= \u1100\u1161
|
| -= \uAC00
|
| -<1 LVt
|
| -<3 LVT
|
| -= \u1100\u1161\u11A8
|
| -= \uAC00\u11A8
|
| -= \uAC01
|
| -<2 LVT\u0308
|
| -<2 \u4E00
|
| -<2 \u4E01
|
| -<2 \u4E80
|
| -<2 \u4EFF
|
| -<2 LV\u0308T
|
| -<1 \uAC02
|
| -
|
| -** test: adjust special reset positions according to previous rules, CLDR ticket 6070
|
| -@ rules
|
| -&[last variable]<x
|
| -[maxVariable space] # has effect only after building, no effect on following rules
|
| -&[last variable]<y
|
| -&[before 1][first regular]<z
|
| -* compare
|
| -<1 ? # some punctuation
|
| -<1 x
|
| -<1 y
|
| -<1 z
|
| -<1 $ # some symbol
|
| -
|
| -@ rules
|
| -&[last primary ignorable]<<x<<<y
|
| -&[last primary ignorable]<<z
|
| -* compare
|
| -<2 \u0358
|
| -<2 x
|
| -<3 y
|
| -<2 z
|
| -<1 \x20
|
| -
|
| -@ rules
|
| -&[last secondary ignorable]<<<x
|
| -&[last secondary ignorable]<<<y
|
| -* compare
|
| -<3 x
|
| -<3 y
|
| -<2 \u0358
|
| -
|
| -@ rules
|
| -&[before 2][first variable]<<z
|
| -&[before 2][first variable]<<y
|
| -&[before 3][first variable]<<<x
|
| -&[before 3][first variable]<<<w
|
| -&[before 1][first variable]<v
|
| -&[before 2][first variable]<<u
|
| -&[before 3][first variable]<<<t
|
| -&[before 2]\uFDD1\xA0<<s # FractionalUCA.txt: FDD1 00A0, SPACE first primary
|
| -* compare
|
| -<2 \u0358
|
| -<1 s
|
| -<2 \uFDD1\xA0
|
| -<1 t
|
| -<3 u
|
| -<2 v
|
| -<1 w
|
| -<3 x
|
| -<3 y
|
| -<2 z
|
| -<2 \t
|
| -
|
| -@ rules
|
| -&[before 2][first regular]<<z
|
| -&[before 3][first regular]<<<y
|
| -&[before 1][first regular]<x
|
| -&[before 3][first regular]<<<w
|
| -&[before 2]\uFDD1\u263A<<v # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
|
| -&[before 3][first regular]<<<u
|
| -&[before 1][first regular]<p # primary before the boundary: becomes variable
|
| -&[before 3][first regular]<<<t # not affected by p
|
| -&[last variable]<q # after p!
|
| -* compare
|
| -<1 ?
|
| -<1 p
|
| -<1 q
|
| -<1 t
|
| -<3 u
|
| -<3 v
|
| -<1 w
|
| -<3 x
|
| -<1 y
|
| -<3 z
|
| -<1 $
|
| -
|
| -# check that p & q are indeed variable
|
| -% alternate=shifted
|
| -* compare
|
| -= ?
|
| -= p
|
| -= q
|
| -<1 t
|
| -<3 u
|
| -<3 v
|
| -<1 w
|
| -<3 x
|
| -<1 y
|
| -<3 z
|
| -<1 $
|
| -
|
| -@ rules
|
| -&[before 2][first trailing]<<z
|
| -&[before 1][first trailing]<y
|
| -&[before 3][first trailing]<<<x
|
| -* compare
|
| -<1 \u4E00 # first Han, first implicit
|
| -<1 \uFDD1\uFDD0 # FractionalUCA.txt: unassigned first primary
|
| -# Note: The root collator currently does not map any characters to the trailing first boundary primary.
|
| -<1 x
|
| -<3 y
|
| -<1 z
|
| -<2 \uFFFD # The root collator currently maps U+FFFD to the first real trailing primary.
|
| -
|
| -@ rules
|
| -&[before 2][first primary ignorable]<<z
|
| -&[before 2][first primary ignorable]<<y
|
| -&[before 3][first primary ignorable]<<<x
|
| -&[before 3][first primary ignorable]<<<w
|
| -* compare
|
| -= \x01
|
| -<2 w
|
| -<3 x
|
| -<3 y
|
| -<2 z
|
| -<2 \u0301
|
| -
|
| -@ rules
|
| -&[before 3][first secondary ignorable]<<<y
|
| -&[before 3][first secondary ignorable]<<<x
|
| -* compare
|
| -= \x01
|
| -<3 x
|
| -<3 y
|
| -<2 \u0301
|
| -
|
| -** test: canonical closure
|
| -@ rules
|
| -&X=A &U=Â
|
| -* compare
|
| -<1 U
|
| -= Â
|
| -= A\u0302
|
| -<2 Ú # U with acute
|
| -= U\u0301
|
| -= Ấ # A with circumflex & acute
|
| -= Â\u0301
|
| -= A\u0302\u0301
|
| -<1 X
|
| -= A
|
| -<2 X\u030A # with ring above
|
| -= Å
|
| -= A\u030A
|
| -= \u212B # Angstrom sign
|
| -
|
| -@ rules
|
| -&x=\u5140\u55C0
|
| -* compare
|
| -<1 x
|
| -= \u5140\u55C0
|
| -= \u5140\uFA0D
|
| -= \uFA0C\u55C0
|
| -= \uFA0C\uFA0D # CJK compatibility characters
|
| -<3 X
|
| -
|
| -# canonical closure on prefix rules, ICU ticket 9444
|
| -@ rules
|
| -&x=ä|ŝ
|
| -* compare
|
| -<1 äs # not tailored
|
| -<1 äx
|
| -= äŝ
|
| -= a\u0308s\u0302
|
| -= a\u0308ŝ
|
| -= äs\u0302
|
| -<3 äX
|
| -
|
| -** test: conjoining Jamo map to expansions
|
| -@ rules
|
| -&gg=\u1101 # Jamo Lead consonant GG
|
| -&nj=\u11AC # Jamo Trail consonant NJ
|
| -* compare
|
| -<1 gg\u1161nj
|
| -= \u1101\u1161\u11AC
|
| -= \uAE4C\u11AC
|
| -= \uAE51
|
| -<3 gg\u1161nJ
|
| -<1 \u1100\u1100
|
| -
|
| -** test: canonical tail closure, ICU ticket 5913
|
| -@ rules
|
| -&a<â
|
| -* compare
|
| -<1 a
|
| -<1 â # tailored
|
| -= a\u0302
|
| -<2 a\u0323\u0302 # discontiguous contraction
|
| -= ạ\u0302 # equivalent
|
| -= ậ # equivalent
|
| -<1 b
|
| -
|
| -@ rules
|
| -&a<ạ
|
| -* compare
|
| -<1 a
|
| -<1 ạ # tailored
|
| -= a\u0323
|
| -<2 a\u0323\u0302 # contiguous contraction plus extra diacritic
|
| -= ạ\u0302 # equivalent
|
| -= ậ # equivalent
|
| -<1 b
|
| -
|
| -# Tail closure should work even if there is a prefix and/or contraction.
|
| -@ rules
|
| -&a<\u5140|câ
|
| -# In order to find discontiguous contractions for \u5140|câ
|
| -# there must exist a mapping for \u5140|ca, regardless of what it maps to.
|
| -# (This follows from the UCA spec.)
|
| -&x=\u5140|ca
|
| -* compare
|
| -<1 \u5140a
|
| -= \uFA0Ca
|
| -<1 \u5140câ # tailored
|
| -= \uFA0Ccâ
|
| -= \u5140ca\u0302
|
| -= \uFA0Cca\u0302
|
| -<2 \u5140ca\u0323\u0302 # discontiguous contraction
|
| -= \uFA0Cca\u0323\u0302
|
| -= \u5140cạ\u0302
|
| -= \uFA0Ccạ\u0302
|
| -= \u5140cậ
|
| -= \uFA0Ccậ
|
| -<1 \u5140b
|
| -= \uFA0Cb
|
| -<1 \u5140x
|
| -= \u5140ca
|
| -
|
| -# Double-check that without the extra mapping there will be no discontiguous match.
|
| -@ rules
|
| -&a<\u5140|câ
|
| -* compare
|
| -<1 \u5140a
|
| -= \uFA0Ca
|
| -<1 \u5140câ # tailored
|
| -= \uFA0Ccâ
|
| -= \u5140ca\u0302
|
| -= \uFA0Cca\u0302
|
| -<1 \u5140b
|
| -= \uFA0Cb
|
| -<1 \u5140ca\u0323\u0302 # no discontiguous contraction
|
| -= \uFA0Cca\u0323\u0302
|
| -= \u5140cạ\u0302
|
| -= \uFA0Ccạ\u0302
|
| -= \u5140cậ
|
| -= \uFA0Ccậ
|
| -
|
| -@ rules
|
| -&a<cạ
|
| -* compare
|
| -<1 a
|
| -<1 cạ # tailored
|
| -= ca\u0323
|
| -<2 ca\u0323\u0302 # contiguous contraction plus extra diacritic
|
| -= cạ\u0302 # equivalent
|
| -= cậ # equivalent
|
| -<1 b
|
| -
|
| -# ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
| -# = 03C9 0313 0300 0345
|
| -# ccc = 0, 230, 230, 240
|
| -@ rules
|
| -&δ=αῳ
|
| -# In order to find discontiguous contractions for αῳ
|
| -# there must exist a mapping for αω, regardless of what it maps to.
|
| -# (This follows from the UCA spec.)
|
| -&ε=αω
|
| -* compare
|
| -<1 δ
|
| -= αῳ
|
| -= αω\u0345
|
| -<2 αω\u0313\u0300\u0345 # discontiguous contraction
|
| -= αὠ\u0300\u0345
|
| -= αὢ\u0345
|
| -= αᾢ
|
| -<2 αω\u0300\u0313\u0345
|
| -= αὼ\u0313\u0345
|
| -= αῲ\u0313 # not FCD
|
| -<1 ε
|
| -= αω
|
| -
|
| -# Double-check that without the extra mapping there will be no discontiguous match.
|
| -@ rules
|
| -&δ=αῳ
|
| -* compare
|
| -<1 αω\u0313\u0300\u0345 # no discontiguous contraction
|
| -= αὠ\u0300\u0345
|
| -= αὢ\u0345
|
| -= αᾢ
|
| -<2 αω\u0300\u0313\u0345
|
| -= αὼ\u0313\u0345
|
| -= αῲ\u0313 # not FCD
|
| -<1 δ
|
| -= αῳ
|
| -= αω\u0345
|
| -
|
| -# Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
|
| -# Tests code paths where the tailored string has a combining mark
|
| -# that does not occur in any composite's decomposition.
|
| -@ rules
|
| -&δ=αὼ\u0315
|
| -* compare
|
| -<1 αω\u0313\u0300\u0315 # Not tailored: The grave accent blocks the comma above.
|
| -= αὠ\u0300\u0315
|
| -= αὢ\u0315
|
| -<1 δ
|
| -= αὼ\u0315
|
| -= αω\u0300\u0315
|
| -<2 αω\u0300\u0315\u0345
|
| -= αὼ\u0315\u0345
|
| -= αῲ\u0315 # not FCD
|
| -
|
| -** test: danish a+a vs. a-umlaut, ICU ticket 9319
|
| -@ rules
|
| -&z<aa
|
| -* compare
|
| -<1 z
|
| -<1 aa
|
| -<2 aa\u0308
|
| -= aä
|
| -
|
| -** test: Jamo L with and in prefix
|
| -# Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
|
| -@ rules
|
| -# Jamo Lead consonant G after G or GG
|
| -&[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
|
| -# Jamo Lead consonant GG sorts like G+G
|
| -&\u1100\u1100=\u1101
|
| -# Note: Making G|GG and GG|GG sort the same as G|G+G
|
| -# would require the ability to reset on G|G+G,
|
| -# or we could make G-after-G equal to some secondary-CE character,
|
| -# and reset on a pair of those.
|
| -# (It does not matter much if there are at most two G in a row in real text.)
|
| -* compare
|
| -<1 \u1100
|
| -<2 \u1100\u1100 # only one primary from a sequence of G lead consonants
|
| -= \u1101
|
| -<2 \u1100\u1100\u1100
|
| -= \u1101\u1100
|
| -# but not = \u1100\u1101, see above
|
| -<1 \u1100\u1161
|
| -= \uAC00
|
| -<2 \u1100\u1100\u1161
|
| -= \u1100\uAC00 # prefix match from the L of the LV syllable
|
| -= \u1101\u1161
|
| -= \uAE4C
|
| -
|
| -** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
|
| -@ rules
|
| -# Low secondary CEs for Jamo V & T.
|
| -# Note: T should sort before V for proper syllable order.
|
| -&\u0332 # COMBINING LOW LINE (first primary ignorable)
|
| -<<\u1161<<\u1162
|
| -
|
| -# Korean Jamo lead consonant search rules, part 2:
|
| -# Make modern compound L jamo primary equivalent to non-compound forms.
|
| -
|
| -# Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
|
| -&\u0313 # COMBINING COMMA ABOVE (second primary ignorable)
|
| -=\u1100|\u1100
|
| -=\u1103|\u1103
|
| -=\u1107|\u1107
|
| -=\u1109|\u1109
|
| -=\u110C|\u110C
|
| -
|
| -# Compound L Jamo map to equivalent expansions of primary+secondary CE.
|
| -&\u1100\u0313=\u1101<<<\u3132 # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
|
| -&\u1103\u0313=\u1104<<<\u3138 # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
|
| -&\u1107\u0313=\u1108<<<\u3143 # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
|
| -&\u1109\u0313=\u110A<<<\u3146 # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
|
| -&\u110C\u0313=\u110D<<<\u3149 # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
|
| -
|
| -* compare
|
| -<1 \u1100\u1161
|
| -= \uAC00
|
| -<2 \u1100\u1162
|
| -= \uAC1C
|
| -<2 \u1100\u1100\u1161
|
| -= \u1100\uAC00
|
| -= \u1101\u1161
|
| -= \uAE4C
|
| -<3 \u3132\u1161
|
| -
|
| -** test: Hangul syllables in prefix & in the interior of a contraction
|
| -@ rules
|
| -&x=\u1100\u1161|a\u1102\u1162z
|
| -* compare
|
| -<1 \u1100\u1161x
|
| -= \u1100\u1161a\u1102\u1162z
|
| -= \u1100\u1161a\uB0B4z
|
| -= \uAC00a\u1102\u1162z
|
| -= \uAC00a\uB0B4z
|
| -
|
| -** test: digits are unsafe-backwards when numeric=on
|
| -@ root
|
| -% numeric=on
|
| -* compare
|
| -# If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
|
| -# We need to back up before the identical prefix "1" and compare the full numbers.
|
| -<1 11b
|
| -<1 101a
|
| -
|
| -** test: simple locale data test
|
| -@ locale de
|
| -* compare
|
| -<1 a
|
| -<2 ä
|
| -<1 ae
|
| -<2 æ
|
| -
|
| -@ locale de-u-co-phonebk
|
| -* compare
|
| -<1 a
|
| -<1 ae
|
| -<2 ä
|
| -<2 æ
|
| -
|
| -# The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
|
| -
|
| -** test: DataDrivenCollationTest/TestMorePinyin
|
| -# Testing the primary strength.
|
| -@ locale zh
|
| -% strength=primary
|
| -* compare
|
| -< lā
|
| -= lĀ
|
| -= Lā
|
| -= LĀ
|
| -< lān
|
| -= lĀn
|
| -< lē
|
| -= lĒ
|
| -= Lē
|
| -= LĒ
|
| -< lēn
|
| -= lĒn
|
| -
|
| -** test: DataDrivenCollationTest/TestLithuanian
|
| -# Lithuanian sort order.
|
| -@ locale lt
|
| -* compare
|
| -< cz
|
| -< č
|
| -< d
|
| -< iz
|
| -< j
|
| -< sz
|
| -< š
|
| -< t
|
| -< zz
|
| -< ž
|
| -
|
| -** test: DataDrivenCollationTest/TestLatvian
|
| -# Latvian sort order.
|
| -@ locale lv
|
| -* compare
|
| -< cz
|
| -< č
|
| -< d
|
| -< gz
|
| -< ģ
|
| -< h
|
| -< iz
|
| -< j
|
| -< kz
|
| -< ķ
|
| -< l
|
| -< lz
|
| -< ļ
|
| -< m
|
| -< nz
|
| -< ņ
|
| -< o
|
| -< rz
|
| -< ŗ
|
| -< s
|
| -< sz
|
| -< š
|
| -< t
|
| -< zz
|
| -< ž
|
| -
|
| -** test: DataDrivenCollationTest/TestEstonian
|
| -# Estonian sort order.
|
| -@ locale et
|
| -* compare
|
| -< sy
|
| -< š
|
| -< šy
|
| -< z
|
| -< zy
|
| -< ž
|
| -< v
|
| -< va
|
| -< w
|
| -< õ
|
| -< õy
|
| -< ä
|
| -< äy
|
| -< ö
|
| -< öy
|
| -< ü
|
| -< üy
|
| -< x
|
| -
|
| -** test: DataDrivenCollationTest/TestAlbanian
|
| -# Albanian sort order.
|
| -@ locale sq
|
| -* compare
|
| -< cz
|
| -< ç
|
| -< d
|
| -< dz
|
| -< dh
|
| -< e
|
| -< ez
|
| -< ë
|
| -< f
|
| -< gz
|
| -< gj
|
| -< h
|
| -< lz
|
| -< ll
|
| -< m
|
| -< nz
|
| -< nj
|
| -< o
|
| -< rz
|
| -< rr
|
| -< s
|
| -< sz
|
| -< sh
|
| -< t
|
| -< tz
|
| -< th
|
| -< u
|
| -< xz
|
| -< xh
|
| -< y
|
| -< zz
|
| -< zh
|
| -
|
| -** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
|
| -# Sorted file has different order.
|
| -@ root
|
| -# normalization=on turned on & off automatically.
|
| -* compare
|
| -< \u5F20
|
| -< \u5F20\u4E00\u8E3F
|
| -
|
| -** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
|
| -# This pretty much crashes.
|
| -@ root
|
| -* compare
|
| -< \u0f71\u0f72\u0f80\u0f71\u0f72
|
| -< \u0f80
|
| -
|
| -** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
|
| -# These are examples of strings that caused trouble in partial sort key testing.
|
| -@ locale th-TH
|
| -* compare
|
| -< \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
|
| -< \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
|
| -* compare
|
| -< \u0E01\u0E07\u0E01\u0E32\u0E23
|
| -< \u0E01\u0E07\u0E42\u0E01\u0E49
|
| -* compare
|
| -< \u0E01\u0E23\u0E19\u0E17\u0E32
|
| -< \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
|
| -* compare
|
| -< \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
|
| -< \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
|
| -* compare
|
| -< \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
|
| -< \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
|
| -
|
| -** test: DataDrivenCollationTest/TestJavaStyleRule
|
| -# java.text allows rules to start as '<<<x<<<y...'
|
| -# we emulate this by assuming a &[first tertiary ignorable] in this case.
|
| -@ rules
|
| -&\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
|
| -* compare
|
| -= a
|
| -= equal
|
| -< z
|
| -< x
|
| -= b # x had become the new first primary ignorable
|
| -< w
|
| -
|
| -** test: DataDrivenCollationTest/TestShiftedIgnorable
|
| -# The UCA states that primary ignorables should be completely
|
| -# ignorable when following a shifted code point.
|
| -@ root
|
| -% alternate=shifted
|
| -% strength=quaternary
|
| -* compare
|
| -< a\u0020b
|
| -= a\u0020\u0300b
|
| -= a\u0020\u0301b
|
| -< a_b
|
| -= a_\u0300b
|
| -= a_\u0301b
|
| -< A\u0020b
|
| -= A\u0020\u0300b
|
| -= A\u0020\u0301b
|
| -< A_b
|
| -= A_\u0300b
|
| -= A_\u0301b
|
| -< a\u0301b
|
| -< A\u0301b
|
| -< a\u0300b
|
| -< A\u0300b
|
| -
|
| -** test: DataDrivenCollationTest/TestNShiftedIgnorable
|
| -# The UCA states that primary ignorables should be completely
|
| -# ignorable when following a shifted code point.
|
| -@ root
|
| -% alternate=non-ignorable
|
| -% strength=tertiary
|
| -* compare
|
| -< a\u0020b
|
| -< A\u0020b
|
| -< a\u0020\u0301b
|
| -< A\u0020\u0301b
|
| -< a\u0020\u0300b
|
| -< A\u0020\u0300b
|
| -< a_b
|
| -< A_b
|
| -< a_\u0301b
|
| -< A_\u0301b
|
| -< a_\u0300b
|
| -< A_\u0300b
|
| -< a\u0301b
|
| -< A\u0301b
|
| -< a\u0300b
|
| -< A\u0300b
|
| -
|
| -** test: DataDrivenCollationTest/TestSafeSurrogates
|
| -# It turned out that surrogates were not skipped properly
|
| -# when iterating backwards if they were in the middle of a
|
| -# contraction. This test assures that this is fixed.
|
| -@ rules
|
| -&a < x\ud800\udc00b
|
| -* compare
|
| -< a
|
| -< x\ud800\udc00b
|
| -
|
| -** test: DataDrivenCollationTest/da_TestPrimary
|
| -# This test goes through primary strength cases
|
| -@ locale da
|
| -% strength=primary
|
| -* compare
|
| -< Lvi
|
| -< Lwi
|
| -* compare
|
| -< L\u00e4vi
|
| -< L\u00f6wi
|
| -* compare
|
| -< L\u00fcbeck
|
| -= Lybeck
|
| -
|
| -** test: DataDrivenCollationTest/da_TestTertiary
|
| -# This test goes through tertiary strength cases
|
| -@ locale da
|
| -% strength=tertiary
|
| -* compare
|
| -< Luc
|
| -< luck
|
| -* compare
|
| -< luck
|
| -< L\u00fcbeck
|
| -* compare
|
| -< lybeck
|
| -< L\u00fcbeck
|
| -* compare
|
| -< L\u00e4vi
|
| -< L\u00f6we
|
| -* compare
|
| -< L\u00f6ww
|
| -< mast
|
| -
|
| -* compare
|
| -< A/S
|
| -< ANDRE
|
| -< ANDR\u00c9
|
| -< ANDREAS
|
| -< AS
|
| -< CA
|
| -< \u00c7A
|
| -< CB
|
| -< \u00c7C
|
| -< D.S.B.
|
| -< DA
|
| -< \u00d0A
|
| -< DB
|
| -< \u00d0C
|
| -< DSB
|
| -< DSC
|
| -< EKSTRA_ARBEJDE
|
| -< EKSTRABUD0
|
| -< H\u00d8ST
|
| -< HAAG
|
| -< H\u00c5NDBOG
|
| -< HAANDV\u00c6RKSBANKEN
|
| -< Karl
|
| -< karl
|
| -< NIELS\u0020J\u00d8RGEN
|
| -< NIELS-J\u00d8RGEN
|
| -< NIELSEN
|
| -< R\u00c9E,\u0020A
|
| -< REE,\u0020B
|
| -< R\u00c9E,\u0020L
|
| -< REE,\u0020V
|
| -< SCHYTT,\u0020B
|
| -< SCHYTT,\u0020H
|
| -< SCH\u00dcTT,\u0020H
|
| -< SCHYTT,\u0020L
|
| -< SCH\u00dcTT,\u0020M
|
| -< SS
|
| -< \u00df
|
| -< SSA
|
| -< STORE\u0020VILDMOSE
|
| -< STOREK\u00c6R0
|
| -< STORM\u0020PETERSEN
|
| -< STORMLY
|
| -< THORVALD
|
| -< THORVARDUR
|
| -< \u00feORVAR\u00d0UR
|
| -< THYGESEN
|
| -< VESTERG\u00c5RD,\u0020A
|
| -< VESTERGAARD,\u0020A
|
| -< VESTERG\u00c5RD,\u0020B
|
| -< \u00c6BLE
|
| -< \u00c4BLE
|
| -< \u00d8BERG
|
| -< \u00d6BERG
|
| -
|
| -* compare
|
| -< andere
|
| -< chaque
|
| -< chemin
|
| -< cote
|
| -< cot\u00e9
|
| -< c\u00f4te
|
| -< c\u00f4t\u00e9
|
| -< \u010du\u010d\u0113t
|
| -< Czech
|
| -< hi\u0161a
|
| -< irdisch
|
| -< lie
|
| -< lire
|
| -< llama
|
| -< l\u00f5ug
|
| -< l\u00f2za
|
| -< lu\u010d
|
| -< luck
|
| -< L\u00fcbeck
|
| -< lye
|
| -< l\u00e4vi
|
| -< L\u00f6wen
|
| -< m\u00e0\u0161ta
|
| -< m\u00eer
|
| -< myndig
|
| -< M\u00e4nner
|
| -< m\u00f6chten
|
| -< pi\u00f1a
|
| -< pint
|
| -< pylon
|
| -< \u0161\u00e0ran
|
| -< savoir
|
| -< \u0160erb\u016bra
|
| -< Sietla
|
| -< \u015blub
|
| -< subtle
|
| -< symbol
|
| -< s\u00e4mtlich
|
| -< verkehrt
|
| -< vox
|
| -< v\u00e4ga
|
| -< waffle
|
| -< wood
|
| -< yen
|
| -< yuan
|
| -< yucca
|
| -< \u017eal
|
| -< \u017eena
|
| -< \u017den\u0113va
|
| -< zoo0
|
| -< Zviedrija
|
| -< Z\u00fcrich
|
| -< zysk0
|
| -< \u00e4ndere
|
| -
|
| -** test: DataDrivenCollationTest/hi_TestNewRules
|
| -# This test goes through new rules and tests against old rules
|
| -@ locale hi
|
| -* compare
|
| -< कॐ
|
| -< कं
|
| -< कँ
|
| -< कः
|
| -
|
| -** test: DataDrivenCollationTest/ro_TestNewRules
|
| -# This test goes through new rules and tests against old rules
|
| -@ locale ro
|
| -* compare
|
| -< xAx
|
| -< xă
|
| -< xĂ
|
| -< Xă
|
| -< XĂ
|
| -< xăx
|
| -< xĂx
|
| -< xâ
|
| -< xÂ
|
| -< Xâ
|
| -< XÂ
|
| -< xâx
|
| -< xÂx
|
| -< xb
|
| -< xIx
|
| -< xî
|
| -< xÎ
|
| -< Xî
|
| -< XÎ
|
| -< xîx
|
| -< xÎx
|
| -< xj
|
| -< xSx
|
| -< xș
|
| -= xş
|
| -< xȘ
|
| -= xŞ
|
| -< Xș
|
| -= Xş
|
| -< XȘ
|
| -= XŞ
|
| -< xșx
|
| -= xşx
|
| -< xȘx
|
| -= xŞx
|
| -< xT
|
| -< xTx
|
| -< xț
|
| -= xţ
|
| -< xȚ
|
| -= xŢ
|
| -< Xț
|
| -= Xţ
|
| -< XȚ
|
| -= XŢ
|
| -< xțx
|
| -= xţx
|
| -< xȚx
|
| -= xŢx
|
| -< xU
|
| -
|
| -** test: DataDrivenCollationTest/testOffsets
|
| -# This tests cases where forwards and backwards iteration get different offsets
|
| -@ locale en
|
| -% strength=tertiary
|
| -* compare
|
| -< a\uD800\uDC00\uDC00
|
| -< b\uD800\uDC00\uDC00
|
| -* compare
|
| -< \u0301A\u0301\u0301
|
| -< \u0301B\u0301\u0301
|
| -* compare
|
| -< abcd\r\u0301
|
| -< abce\r\u0301
|
| -# TODO: test offsets in new CollationTest
|
| -
|
| -# End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
|
| -
|
| -** test: was ICU 52 cmsccoll/TestRedundantRules
|
| -@ rules
|
| -& a < b < c < d& [before 1] c < m
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<1 m
|
| -<1 c
|
| -<1 d
|
| -
|
| -@ rules
|
| -& a < b <<< c << d <<< e& [before 3] e <<< x
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<3 c
|
| -<2 d
|
| -<3 x
|
| -<3 e
|
| -
|
| -@ rules
|
| -& a < b <<< c << d <<< e <<< f < g& [before 1] g < x
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<3 c
|
| -<2 d
|
| -<3 e
|
| -<3 f
|
| -<1 x
|
| -<1 g
|
| -
|
| -@ rules
|
| -& a <<< b << c < d& a < m
|
| -* compare
|
| -<1 a
|
| -<3 b
|
| -<2 c
|
| -<1 m
|
| -<1 d
|
| -
|
| -@ rules
|
| -&a<b<<b\u0301 &z<b
|
| -* compare
|
| -<1 a
|
| -<1 b\u0301
|
| -<1 z
|
| -<1 b
|
| -
|
| -@ rules
|
| -&z<m<<<q<<<m
|
| -* compare
|
| -<1 z
|
| -<1 q
|
| -<3 m
|
| -
|
| -@ rules
|
| -&z<<<m<q<<<m
|
| -* compare
|
| -<1 z
|
| -<1 q
|
| -<3 m
|
| -
|
| -@ rules
|
| -& a < b < c < d& r < c
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<1 d
|
| -<1 r
|
| -<1 c
|
| -
|
| -@ rules
|
| -& a < b < c < d& c < m
|
| -* compare
|
| -<1 a
|
| -<1 b
|
| -<1 c
|
| -<1 m
|
| -<1 d
|
| -
|
| -@ rules
|
| -& a < b < c < d& a < m
|
| -* compare
|
| -<1 a
|
| -<1 m
|
| -<1 b
|
| -<1 c
|
| -<1 d
|
| -
|
| -** test: was ICU 52 cmsccoll/TestExpansionSyntax
|
| -# The following two rules should sort the particular list of strings the same.
|
| -@ rules
|
| -&AE <<< a << b <<< c &d <<< f
|
| -* compare
|
| -<1 AE
|
| -<3 a
|
| -<2 b
|
| -<3 c
|
| -<1 d
|
| -<3 f
|
| -
|
| -@ rules
|
| -&A <<< a / E << b / E <<< c /E &d <<< f
|
| -* compare
|
| -<1 AE
|
| -<3 a
|
| -<2 b
|
| -<3 c
|
| -<1 d
|
| -<3 f
|
| -
|
| -# The following two rules should sort the particular list of strings the same.
|
| -@ rules
|
| -&AE <<< a <<< b << c << d < e < f <<< g
|
| -* compare
|
| -<1 AE
|
| -<3 a
|
| -<3 b
|
| -<2 c
|
| -<2 d
|
| -<1 e
|
| -<1 f
|
| -<3 g
|
| -
|
| -@ rules
|
| -&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
|
| -* compare
|
| -<1 AE
|
| -<3 a
|
| -<3 b
|
| -<2 c
|
| -<2 d
|
| -<1 e
|
| -<1 f
|
| -<3 g
|
| -
|
| -# The following two rules should sort the particular list of strings the same.
|
| -@ rules
|
| -&AE <<< B <<< C / D <<< F
|
| -* compare
|
| -<1 AE
|
| -<3 B
|
| -<3 F
|
| -<1 AED
|
| -<3 C
|
| -
|
| -@ rules
|
| -&A <<< B / E <<< C / ED <<< F / E
|
| -* compare
|
| -<1 AE
|
| -<3 B
|
| -<3 F
|
| -<1 AED
|
| -<3 C
|
| -
|
| -** test: never reorder trailing primaries
|
| -@ root
|
| -% reorder Zzzz Grek
|
| -* compare
|
| -<1 L
|
| -<1 字
|
| -<1 Ω
|
| -<1 \uFFFD
|
| -<1 \uFFFF
|
| -
|
| -** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
|
| -@ rules
|
| -&u=ab|cd
|
| -&v=b|ce
|
| -* compare
|
| -<1 abc
|
| -<1 abcc
|
| -<1 abcf
|
| -<1 abcd
|
| -= abu
|
| -<1 abce
|
| -= abv
|
| -
|
| -# With the following rules, there is only one prefix per composite ĉ or ç,
|
| -# but both prefixes apply to just c in NFD form.
|
| -# We would get different results for composed vs. NFD input
|
| -# if we fell back directly from longest-prefix mappings to no-prefix mappings.
|
| -@ rules
|
| -&x=op|ĉ
|
| -&y=p|ç
|
| -* compare
|
| -<1 opc
|
| -<2 opć
|
| -<1 opcz
|
| -<1 opd
|
| -<1 opĉ
|
| -= opc\u0302
|
| -= opx
|
| -<1 opç
|
| -= opc\u0327
|
| -= opy
|
| -
|
| -# The mapping is used which has the longest matching prefix for which
|
| -# there is also a suffix match, with the longest suffix match among several for that prefix.
|
| -@ rules
|
| -&❶=d
|
| -&❷=de
|
| -&❸=def
|
| -&①=c|d
|
| -&②=c|de
|
| -&③=c|def
|
| -&④=bc|d
|
| -&⑤=bc|de
|
| -&⑥=bc|def
|
| -&⑦=abc|d
|
| -&⑧=abc|de
|
| -&⑨=abc|def
|
| -* compare
|
| -<1 9aadzz
|
| -= 9aa❶zz
|
| -<1 9aadez
|
| -= 9aa❷z
|
| -<1 9aadef
|
| -= 9aa❸
|
| -<1 9acdzz
|
| -= 9ac①zz
|
| -<1 9acdez
|
| -= 9ac②z
|
| -<1 9acdef
|
| -= 9ac③
|
| -<1 9bcdzz
|
| -= 9bc④zz
|
| -<1 9bcdez
|
| -= 9bc⑤z
|
| -<1 9bcdef
|
| -= 9bc⑥
|
| -<1 abcdzz
|
| -= abc⑦zz
|
| -<1 abcdez
|
| -= abc⑧z
|
| -<1 abcdef
|
| -= abc⑨
|
| -
|
| -** test: prefix + discontiguous contraction with missing prefix contraction
|
| -# Unfortunate terminology: The first "prefix" here is the pre-context,
|
| -# the second "prefix" refers to the contraction/relation string that is
|
| -# one shorter than the one being tested.
|
| -@ rules
|
| -&x=p|e
|
| -&y=p|ê
|
| -&z=op|ê
|
| -# No mapping for op|e:
|
| -# Discontiguous contraction matching should not match op|ê in opệ
|
| -# because it would have to skip the dot below and extend a match on op|e by the circumflex,
|
| -# but there is no match on op|e.
|
| -* compare
|
| -<1 oPe
|
| -<1 ope
|
| -= opx
|
| -<1 opệ
|
| -= opy\u0323 # y not z
|
| -<1 opê
|
| -= opz
|
| -
|
| -# We cannot test for fallback by whether the contraction default CE32
|
| -# is for another contraction. With the following rules, there is no mapping for op|e,
|
| -# and the fallback to prefix p has no contractions.
|
| -@ rules
|
| -&x=p|e
|
| -&z=op|ê
|
| -* compare
|
| -<1 oPe
|
| -<1 ope
|
| -= opx
|
| -<2 opệ
|
| -= opx\u0323\u0302 # x not z
|
| -<1 opê
|
| -= opz
|
| -
|
| -# One more variation: Fallback to the simple code point, no shorter non-empty prefix.
|
| -@ rules
|
| -&x=e
|
| -&z=op|ê
|
| -* compare
|
| -<1 ope
|
| -= opx
|
| -<3 oPe
|
| -= oPx
|
| -<2 opệ
|
| -= opx\u0323\u0302 # x not z
|
| -<1 opê
|
| -= opz
|
| -
|
| -** test: maxVariable via rules
|
| -@ rules
|
| -[maxVariable space][alternate shifted]
|
| -* compare
|
| -= \u0020
|
| -= \u000A
|
| -<1 .
|
| -<1 ° # degree sign
|
| -<1 $
|
| -<1 0
|
| -
|
| -** test: maxVariable via setting
|
| -@ root
|
| -% maxVariable=currency
|
| -% alternate=shifted
|
| -* compare
|
| -= \u0020
|
| -= \u000A
|
| -= .
|
| -= ° # degree sign
|
| -= $
|
| -<1 0
|
| -
|
| -** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
|
| -# This tests canonical closure, but it also tests that CollationFastLatin
|
| -# bails out properly for contractions with combining marks.
|
| -# For that we need pairs of strings that remain in the Latin fastpath
|
| -# long enough, hence the extra "= b" lines.
|
| -@ rules
|
| -&b=\u00e4\u00e4
|
| -* compare
|
| -<1 b
|
| -= \u00e4\u00e4
|
| -= b
|
| -= a\u0308a\u0308
|
| -= b
|
| -= \u00e4a\u0308
|
| -= b
|
| -= a\u0308\u00e4
|
| -
|
| -** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
|
| -@ rules
|
| -&b=\u00C5
|
| -* compare
|
| -<1 b
|
| -= \u00C5
|
| -= b
|
| -= A\u030A
|
| -= b
|
| -= \u212B
|
| -
|
| -** test: reset-before on already-tailored characters, ICU ticket 10108
|
| -@ rules
|
| -&a<w<<x &[before 2]x<<y
|
| -* compare
|
| -<1 a
|
| -<1 w
|
| -<2 y
|
| -<2 x
|
| -
|
| -@ rules
|
| -&a<<w<<<x &[before 2]x<<y
|
| -* compare
|
| -<1 a
|
| -<2 y
|
| -<2 w
|
| -<3 x
|
| -
|
| -@ rules
|
| -&a<w<x &[before 2]x<<y
|
| -* compare
|
| -<1 a
|
| -<1 w
|
| -<1 y
|
| -<2 x
|
| -
|
| -@ rules
|
| -&a<w<<<x &[before 2]x<<y
|
| -* compare
|
| -<1 a
|
| -<1 y
|
| -<2 w
|
| -<3 x
|
| -
|
| -** test: numeric collation with other settings, ICU ticket 9092
|
| -@ root
|
| -% strength=identical
|
| -% caseFirst=upper
|
| -% numeric=on
|
| -* compare
|
| -<1 100\u0020a
|
| -<1 101
|
| -
|
| -** test: collation type fallback from unsupported type, ICU ticket 10149
|
| -@ locale fr-CA-u-co-phonebk
|
| -# Expect the same result as with fr-CA, using backwards-secondary order.
|
| -# That is, we should fall back from the unsupported collation type
|
| -# to the locale's default collation type.
|
| -* compare
|
| -<1 cote
|
| -<2 côte
|
| -<2 coté
|
| -<2 côté
|
| -
|
| -** test: @ is equivalent to [backwards 2], ICU ticket 9956
|
| -@ rules
|
| -&b<a @ &v<<w
|
| -* compare
|
| -<1 b
|
| -<1 a
|
| -<1 cote
|
| -<2 côte
|
| -<2 coté
|
| -<2 côté
|
| -<1 v
|
| -<2 w
|
| -<1 x
|
| -
|
| -** test: shifted+reordering, ICU ticket 9507
|
| -@ root
|
| -% reorder Grek punct space
|
| -% alternate=shifted
|
| -% strength=quaternary
|
| -# Which primaries are "variable" should be determined without script reordering,
|
| -# and then primaries should be reordered whether they are shifted to quaternary or not.
|
| -* compare
|
| -<4 ( # punctuation
|
| -<4 )
|
| -<4 \u0020 # space
|
| -<1 ` # symbol
|
| -<1 ^
|
| -<1 $ # currency symbol
|
| -<1 €
|
| -<1 0 # numbers
|
| -<1 ε # Greek
|
| -<1 e # Latin
|
| -<1 e(e
|
| -<4 e)e
|
| -<4 e\u0020e
|
| -<4 ee
|
| -<3 e(E
|
| -<4 e)E
|
| -<4 e\u0020E
|
| -<4 eE
|
| -
|
| -** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
|
| -@ rules
|
| -&\u0001<<<b<<<B
|
| -% caseFirst=upper
|
| -* compare
|
| -<1 aaa
|
| -<3 aaaB
|
| -
|
| -** test: secondary+case ignores secondary ignorables, ICU ticket 9355
|
| -@ rules
|
| -&\u0001<<<b<<<B
|
| -% strength=secondary
|
| -% caseLevel=on
|
| -* compare
|
| -<1 a
|
| -= ab
|
| -= aB
|
| -
|
| -** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
|
| -@ rules
|
| -&[before 2] ൌ << ൗ # U+0D57 << U+0D4C == 0D46+0D57
|
| -* compare
|
| -<1 ൗx
|
| -<2 ൌx
|
| -<1 ൗy
|
| -<2 ൌy
|
| -
|
| -** test: quoted apostrophe in compact syntax, ICU ticket 8204
|
| -@ rules
|
| -&q<<*a''c
|
| -* compare
|
| -<1 d
|
| -<1 p
|
| -<1 q
|
| -<2 a
|
| -<2 \u0027
|
| -<2 c
|
| -<1 r
|
| -
|
| -# ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
|
| -** test: locale -u- with collation keywords, ICU ticket 8260
|
| -@ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
|
| -* compare
|
| -<4 \u0020 # space is shifted, strength=quaternary
|
| -<1 ! # punctuation is regular
|
| -<1 2
|
| -<1 12 # numeric sorting
|
| -<1 B
|
| -<c b # uppercase first on case level
|
| -<1 x\u0301\u0308
|
| -<2 x\u0308\u0301 # normalization off
|
| -
|
| -** test: locale @ with collation keywords, ICU ticket 8260
|
| -@ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
|
| -* compare
|
| -<4 $ # currency symbols are shifted, strength=quaternary
|
| -<1 àla
|
| -<2 alà # backwards secondary level
|
| -
|
| -** test: locale -u- with script reordering, ICU ticket 8260
|
| -@ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
|
| -* compare
|
| -<1 \u0020
|
| -<1 あ
|
| -<1 ☂
|
| -<1 Ω
|
| -<1 丂
|
| -<1 ж
|
| -<1 L
|
| -<1 4
|
| -<1 Ձ
|
| -<1 अ
|
| -<1 ሄ
|
| -<1 ฉ
|
| -
|
| -** test: locale @collation=type should be case-insensitive
|
| -@ locale de@coLLation=PhoneBook
|
| -* compare
|
| -<1 ae
|
| -<2 ä
|
| -<3 Ä
|
| -
|
| -** test: import root search rules plus German phonebook rules, ICU ticket 8962
|
| -@ locale de-u-co-search
|
| -* compare
|
| -<1 =
|
| -<1 ≠
|
| -<1 a
|
| -<1 ae
|
| -<2 ä
|
| -
|
| -# Once more, but with runtime builder.
|
| -@ rules
|
| -[import und-u-co-search][import de-u-co-phonebk]
|
| -* compare
|
| -<1 =
|
| -<1 ≠
|
| -<1 a
|
| -<1 ae
|
| -<2 ä
|
| -
|
| -# Once again, with import from "root" not "und" (as in a proper language tag).
|
| -@ rules
|
| -[import root-u-co-search][import de-u-co-phonebk]
|
| -* compare
|
| -<1 =
|
| -<1 ≠
|
| -<1 a
|
| -<1 ae
|
| -<2 ä
|
| -
|
| -** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
|
| -# Greek should sort Greek first.
|
| -@ rules
|
| -[import el]
|
| -* compare
|
| -<1 4
|
| -<1 Ω
|
| -<1 L
|
| -
|
| -# Import Greek, and then reset the reordering.
|
| -@ rules
|
| -[import el][reorder Zzzz]
|
| -* compare
|
| -<1 4
|
| -<1 L
|
| -<1 Ω
|
| -
|
| -# "others" is a synonym for Zzzz.
|
| -@ rules
|
| -[import el][reorder others]
|
| -* compare
|
| -<1 4
|
| -<1 L
|
| -<1 Ω
|
| -
|
| -** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
|
| -@ rules
|
| -&x<<aa<<<Aa<<<AA
|
| -% strength=secondary
|
| -* compare
|
| -<1 AA
|
| -<2 Aẩ
|
| -<2 aą
|
| -* compare
|
| -<1 AA
|
| -<2 aą
|
| -
|
| -** test: tailor tertiary-after a common tertiary where there is a lower one
|
| -# Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
|
| -# See ICU ticket 11448 & CLDR ticket 7222.
|
| -@ rules
|
| -&あ<<<x<<<y<<<z
|
| -* compare
|
| -<1 ぁ
|
| -<3 あ
|
| -<3 x
|
| -<3 y
|
| -<3 z
|
| -<3 ァ
|
| -<1 い
|
| -
|
| -** test: tailor tertiary-after a below-common tertiary
|
| -@ rules
|
| -&ぁ<<<x<<<y<<<z
|
| -* compare
|
| -<1 ぁ
|
| -<3 x
|
| -<3 y
|
| -<3 z
|
| -<3 あ
|
| -<3 ァ
|
| -<1 い
|
| -
|
| -** test: tailor tertiary-before a common tertiary where there is a lower one
|
| -@ rules
|
| -&[before 3]あ<<<x<<<y<<<z
|
| -* compare
|
| -<1 ぁ
|
| -<3 x
|
| -<3 y
|
| -<3 z
|
| -<3 あ
|
| -<3 ァ
|
| -<1 い
|
| -
|
| -** test: tailor tertiary-before a below-common tertiary
|
| -@ rules
|
| -&[before 3]ぁ<<<x<<<y<<<z
|
| -* compare
|
| -<1 x
|
| -<3 y
|
| -<3 z
|
| -<3 ぁ
|
| -<3 あ
|
| -<3 ァ
|
| -<1 い
|
| -
|
| -** test: reorder single scripts not groups, ICU ticket 11449
|
| -@ root
|
| -% reorder Goth Latn
|
| -* compare
|
| -<1 4
|
| -<1 𐌰 # Gothic
|
| -<1 L
|
| -<1 Ω
|
| -# Before ICU 55, the following reordered together with Gothic.
|
| -<1 𐌈 # Old Italic
|
| -<1 𐑐 # Shavian
|
|
|