| Index: source/test/testdata/collationtest.txt
|
| diff --git a/source/test/testdata/collationtest.txt b/source/test/testdata/collationtest.txt
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d55f53188892f67d62c19083cef0963589c5806a
|
| --- /dev/null
|
| +++ b/source/test/testdata/collationtest.txt
|
| @@ -0,0 +1,2466 @@
|
| +# Copyright (c) 2012-2014 International Business Machines
|
| +# Corporation and others. All Rights Reserved.
|
| +#
|
| +# This file should be in UTF-8 with a signature byte sequence ("BOM").
|
| +#
|
| +# collationtest.txt: Collation test data.
|
| +#
|
| +# created on: 2012apr13
|
| +# created by: Markus W. Scherer
|
| +
|
| +# A line with "** test: description" is used for verbose and error output.
|
| +
|
| +# A collator can be set with "@ root" or "@ locale language-tag",
|
| +# for example "@ locale de-u-co-phonebk".
|
| +# An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
|
| +
|
| +# A collator can be built with "@ rules".
|
| +# An "@ rules" line is followed by one or more lines with the tailoring rules.
|
| +
|
| +# A collator can be modified with "% attribute=value".
|
| +
|
| +# "* compare" tests the order (= or <) of the following strings.
|
| +# The relation can be "=" or "<" (the level of the difference is not specified)
|
| +# or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
|
| +
|
| +# Test sections ("* compare") are terminated by
|
| +# definitions of new collators, changing attributes, or new test sections.
|
| +
|
| +** test: simple CEs & expansions
|
| +# Many types of mappings are tested elsewhere, including via the UCA conformance tests.
|
| +# Here we mostly cover a few unusual mappings.
|
| +@ rules
|
| +&\x01 # most control codes are ignorable
|
| +<<<\u0300 # tertiary CE
|
| +&9<\x00 # NUL not ignorable
|
| +&\uA00A\uA00B=\uA002 # two long-primary CEs
|
| +&\uA00A\uA00B\u00050005=\uA003 # three CEs, require 64 bits
|
| +
|
| +* compare
|
| += \x01
|
| += \x02
|
| +<3 \u0300
|
| +<1 9
|
| +<1 \x00
|
| += \x01\x00\x02
|
| +<1 a
|
| +<3 a\u0300
|
| +<2 a\u0308
|
| += ä
|
| +<1 b
|
| +<1 か # Hiragana Ka (U+304B)
|
| +<2 か\u3099 # plus voiced sound mark
|
| += が # Hiragana Ga (U+304C)
|
| +<1 \uA00A\uA00B
|
| += \uA002
|
| +<1 \uA00A\uA00B\u00050004
|
| +<1 \uA00A\uA00B\u00050005
|
| += \uA003
|
| +<1 \uA00A\uA00B\u00050006
|
| +
|
| +** test: contractions
|
| +# Create some interesting mappings, and map some normalization-inert characters
|
| +# (which are not subject to canonical reordering)
|
| +# to some of the same CEs to check the sequence of CEs.
|
| +@ rules
|
| +
|
| +# Contractions starting with 'a' should not continue with any character < U+0300
|
| +# so that we can test a shortcut for that.
|
| +&a=ⓐ
|
| +&b<bz=ⓑ
|
| +&d<dz\u0301=ⓓ # d+z+acute
|
| +&z
|
| +<a\u0301=Ⓐ # a+acute sorts after z
|
| +<a\u0301\u0301=Ⓑ # a+acute+acute
|
| +<a\u0301\u0301\u0358=Ⓒ # a+acute+acute+dot above right
|
| +<a\u030a=Ⓓ # a+ring
|
| +<a\u0323=Ⓔ # a+dot below
|
| +<a\u0323\u0358=Ⓕ # a+dot below+dot above right
|
| +<a\u0327\u0323\u030a=Ⓖ # a+cedilla+dot below+ring
|
| +<a\u0327\u0323bz=Ⓗ # a+cedilla+dot below+b+z
|
| +
|
| +&\U0001D158=⁰ # musical notehead black (has a symbol primary)
|
| +<\U0001D158\U0001D165=¼ # musical quarter note
|
| +
|
| +# deliberately missing prefix contractions:
|
| +# dz
|
| +# a\u0327
|
| +# a\u0327\u0323
|
| +# a\u0327\u0323b
|
| +
|
| +&\x01
|
| +<<<\U0001D165=¹ # musical stem (ccc=216)
|
| +<<<\U0001D16D=² # musical augmentation dot (ccc=226)
|
| +<<<\U0001D165\U0001D16D=³ # stem+dot (ccc=216 226)
|
| +&\u0301=❶ # acute (ccc=230)
|
| +&\u030a=❷ # ring (ccc=230)
|
| +&\u0308=❸ # diaeresis (ccc=230)
|
| +<<\u0308\u0301=❹ # diaeresis+acute (=dialytika tonos) (ccc=230 230)
|
| +&\u0327=❺ # cedilla (ccc=202)
|
| +&\u0323=❻ # dot below (ccc=220)
|
| +&\u0331=❼ # macron below (ccc=220)
|
| +<<\u0331\u0358=❽ # macron below+dot above right (ccc=220 232)
|
| +&\u0334=❾ # tilde overlay (ccc=1)
|
| +&\u0358=❿ # dot above right (ccc=232)
|
| +
|
| +&\u0f71=① # tibetan vowel sign aa
|
| +&\u0f72=② # tibetan vowel sign i
|
| +# \u0f71\u0f72 # tibetan vowel sign aa + i = ii = U+0F73
|
| +&\u0f73=③ # tibetan vowel sign ii (ccc=0 but lccc=129)
|
| +
|
| +** test: simple contractions
|
| +
|
| +# Some strings are chosen to cause incremental contiguous contraction matching to
|
| +# go into partial matches for prefixes of contractions
|
| +# (where the prefixes are deliberately not also contractions).
|
| +# When there is no complete match, then the matching code must back out of those
|
| +# so that discontiguous contractions work as specified.
|
| +
|
| +* compare
|
| +# contraction starter with no following text, or mismatch, or blocked
|
| +<1 a
|
| += ⓐ
|
| +<1 aa
|
| += ⓐⓐ
|
| +<1 ab
|
| += ⓐb
|
| +<1 az
|
| += ⓐz
|
| +
|
| +* compare
|
| +<1 a
|
| +<2 a\u0308\u030a # ring blocked by diaeresis
|
| += ⓐ❸❷
|
| +<2 a\u0327
|
| += ⓐ❺
|
| +
|
| +* compare
|
| +<2 \u0308
|
| += ❸
|
| +<2 \u0308\u030a\u0301 # acute blocked by ring
|
| += ❸❷❶
|
| +
|
| +* compare
|
| +<1 \U0001D158
|
| += ⁰
|
| +<1 \U0001D158\U0001D165
|
| += ¼
|
| +
|
| +# no discontiguous contraction because of missing prefix contraction d+z,
|
| +# and a starter ('z') after the 'd'
|
| +* compare
|
| +<1 dz\u0323\u0301
|
| += dz❻❶
|
| +
|
| +# contiguous contractions
|
| +* compare
|
| +<1 abz
|
| += ⓐⓑ
|
| +<1 abzz
|
| += ⓐⓑz
|
| +
|
| +* compare
|
| +<1 a
|
| +<1 z
|
| +<1 a\u0301
|
| += Ⓐ
|
| +<1 a\u0301\u0301
|
| += Ⓑ
|
| +<1 a\u0301\u0301\u0358
|
| += Ⓒ
|
| +<1 a\u030a
|
| += Ⓓ
|
| +<1 a\u0323\u0358
|
| += Ⓕ
|
| +<1 a\u0327\u0323\u030a # match despite missing prefix
|
| += Ⓖ
|
| +<1 a\u0327\u0323bz
|
| += Ⓗ
|
| +
|
| +* compare
|
| +<2 \u0308\u0308\u0301 # acute blocked from first diaeresis, contracts with second
|
| += ❸❹
|
| +
|
| +* compare
|
| +<1 \U0001D158\U0001D165
|
| += ¼
|
| +
|
| +* compare
|
| +<3 \U0001D165\U0001D16D
|
| += ³
|
| +
|
| +** test: discontiguous contractions
|
| +* compare
|
| +<1 a\u0327\u030a # a+ring skips cedilla
|
| += Ⓓ❺
|
| +<2 a\u0327\u0327\u030a # a+ring skips 2 cedillas
|
| += Ⓓ❺❺
|
| +<2 a\u0327\u0327\u0327\u030a # a+ring skips 3 cedillas
|
| += Ⓓ❺❺❺
|
| +<2 a\u0334\u0327\u0327\u030a # a+ring skips tilde overlay & 2 cedillas
|
| += Ⓓ❾❺❺
|
| +<1 a\u0327\u0323 # a+dot below skips cedilla
|
| += Ⓔ❺
|
| +<1 a\u0323\u0301\u0358 # a+dot below+dot ab.r.: 2-char match, then skips acute
|
| += Ⓕ❶
|
| +<2 a\u0334\u0323\u0358 # a+dot below skips tilde overlay
|
| += Ⓕ❾
|
| +
|
| +* compare
|
| +<2 \u0331\u0331\u0358 # macron below+dot ab.r. skips the second macron below
|
| += ❽❼
|
| +
|
| +* compare
|
| +<1 a\u0327\u0331\u0323\u030a # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
|
| += Ⓓ❺❼❻
|
| +<1 a\u0327\u0323\U0001D16D\u030a # a+dot below skips cedilla
|
| += Ⓔ❺²❷
|
| +<2 a\u0327\u0327\u0323\u030a # a+dot below skips 2 cedillas
|
| += Ⓔ❺❺❷
|
| +<2 a\u0327\u0323\u0323\u030a # a+dot below skips cedilla
|
| += Ⓔ❺❻❷
|
| +<2 a\u0334\u0327\u0323\u030a # a+dot below skips tilde overlay & cedilla
|
| += Ⓔ❾❺❷
|
| +
|
| +* compare
|
| +<1 \U0001D158\u0327\U0001D165 # quarter note skips cedilla
|
| += ¼❺
|
| +<1 a\U0001D165\u0323 # a+dot below skips stem
|
| += Ⓔ¹
|
| +
|
| +# partial contiguous match, backs up, matches discontiguous contraction
|
| +<1 a\u0327\u0323b
|
| += Ⓔ❺b
|
| +<1 a\u0327\u0323ba
|
| += Ⓔ❺bⓐ
|
| +
|
| +# a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
|
| +* compare
|
| +<1 a\u0327\u0301\u0301\u0358
|
| += Ⓒ❺
|
| +
|
| +# FCD but not NFD
|
| +* compare
|
| +<1 a\u0f73\u0301 # a+acute skips tibetan ii
|
| += Ⓐ③
|
| +
|
| +# FCD but the 0f71 inside the 0f73 must be skipped
|
| +# to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
|
| +* compare
|
| +<1 \u0f71\u0f73 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
|
| += ③①
|
| +
|
| +** test: discontiguous contractions with nested contractions
|
| +* compare
|
| +<1 a\u0323\u0308\u0301\u0358
|
| += Ⓕ❹
|
| +<2 a\u0323\u0308\u0301\u0308\u0301\u0358
|
| += Ⓕ❹❹
|
| +
|
| +** test: discontiguous contractions with interleaved contractions
|
| +* compare
|
| +# a+ring & cedilla & macron below+dot above right
|
| +<1 a\u0327\u0331\u030a\u0358
|
| += Ⓓ❺❽
|
| +
|
| +# a+ring & 1x..3x macron below+dot above right
|
| +<2 a\u0331\u030a\u0358
|
| += Ⓓ❽
|
| +<2 a\u0331\u0331\u030a\u0358\u0358
|
| += Ⓓ❽❽
|
| +# also skips acute
|
| +<2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
|
| += Ⓓ❽❽❽❶
|
| +
|
| +# a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
|
| +<1 a\U0001D165\u0323\U0001D16Ddz\u0301
|
| += Ⓔ³ⓓ
|
| +
|
| +** test: some simple string comparisons
|
| +@ root
|
| +* compare
|
| +# first string compares against ""
|
| += \u0000
|
| +< a
|
| +<1 b
|
| +<3 B
|
| += \u0000B\u0000
|
| +
|
| +** test: compare with strength=primary
|
| +% strength=primary
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| += B
|
| +
|
| +** test: compare with strength=secondary
|
| +% strength=secondary
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| += B
|
| +
|
| +** test: compare with strength=tertiary
|
| +% strength=tertiary
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<3 B
|
| +
|
| +** test: compare with strength=quaternary
|
| +% strength=quaternary
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<3 B
|
| +
|
| +** test: compare with strength=identical
|
| +% strength=identical
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<3 B
|
| +
|
| +** test: côté with forwards secondary
|
| +@ root
|
| +* compare
|
| +<1 cote
|
| +<2 coté
|
| +<2 côte
|
| +<2 côté
|
| +
|
| +** test: côté with forwards secondary vs. U+FFFE merge separator
|
| +# Merged sort keys: On each level, any difference in the first segment
|
| +# must trump any further difference.
|
| +* compare
|
| +<1 cote\uFFFEcôté
|
| +<2 coté\uFFFEcôte
|
| +<2 côte\uFFFEcoté
|
| +<2 côté\uFFFEcote
|
| +
|
| +** test: côté with backwards secondary
|
| +% backwards=on
|
| +* compare
|
| +<1 cote
|
| +<2 côte
|
| +<2 coté
|
| +<2 côté
|
| +
|
| +** test: côté with backwards secondary vs. U+FFFE merge separator
|
| +# Merged sort keys: On each level, any difference in the first segment
|
| +# must trump any further difference.
|
| +* compare
|
| +<1 cote\uFFFEcôté
|
| +<2 côte\uFFFEcoté
|
| +<2 coté\uFFFEcôte
|
| +<2 côté\uFFFEcote
|
| +
|
| +** test: U+FFFE on identical level
|
| +@ root
|
| +% strength=identical
|
| +* compare
|
| +# All of these control codes are completely-ignorable, so that
|
| +# their low code points are compared with the merge separator.
|
| +# The merge separator must compare less than any other character.
|
| +<1 \uFFFE\u0001\u0002\u0003
|
| +<i \u0001\uFFFE\u0002\u0003
|
| +<i \u0001\u0002\uFFFE\u0003
|
| +<i \u0001\u0002\u0003\uFFFE
|
| +
|
| +* compare
|
| +# The merge separator must even compare less than U+0000.
|
| +<1 \uFFFE\u0000\u0000
|
| +<i \u0000\uFFFE\u0000
|
| +<i \u0000\u0000\uFFFE
|
| +
|
| +** test: Hani < surrogates < U+FFFD
|
| +# Note: compareUTF8() treats unpaired surrogates like U+FFFD,
|
| +# so with that the strings with surrogates will compare equal to each other
|
| +# and equal to the string with U+FFFD.
|
| +@ root
|
| +% strength=identical
|
| +* compare
|
| +<1 abz
|
| +<1 a\u4e00z
|
| +<1 a\U00020000z
|
| +<1 a\ud800z
|
| +<1 a\udbffz
|
| +<1 a\udc00z
|
| +<1 a\udfffz
|
| +<1 a\ufffdz
|
| +
|
| +** test: script reordering
|
| +@ root
|
| +% reorder Hani Zzzz digit
|
| +* compare
|
| +<1 ?
|
| +<1 +
|
| +<1 丂
|
| +<1 a
|
| +<1 α
|
| +<1 5
|
| +
|
| +% reorder default
|
| +* compare
|
| +<1 ?
|
| +<1 +
|
| +<1 5
|
| +<1 a
|
| +<1 α
|
| +<1 丂
|
| +
|
| +** test: empty rules
|
| +@ rules
|
| +* compare
|
| +<1 a
|
| +<2 ä
|
| +<3 Ä
|
| +<1 b
|
| +
|
| +** test: very simple rules
|
| +@ rules
|
| +&a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
|
| +% strength=quaternary
|
| +* compare
|
| +<1 a
|
| += e
|
| +<4 q
|
| +<4 r
|
| +<1 x
|
| +<3 X
|
| +<2 y
|
| +<3 Y
|
| +<2 z
|
| +<3 Z
|
| +
|
| +** test: tailoring twice before a root position: primary
|
| +@ rules
|
| +&[before 1]b<p
|
| +&[before 1]b<q
|
| +* compare
|
| +<1 a
|
| +<1 p
|
| +<1 q
|
| +<1 b
|
| +
|
| +** test: tailoring twice before a root position: secondary
|
| +@ rules
|
| +&[before 2]ſ<<p
|
| +&[before 2]ſ<<q
|
| +* compare
|
| +<1 s
|
| +<2 p
|
| +<2 q
|
| +<2 ſ
|
| +
|
| +# secondary-before common weight
|
| +@ rules
|
| +&[before 2]b<<p
|
| +&[before 2]b<<q
|
| +* compare
|
| +<1 a
|
| +<1 p
|
| +<2 q
|
| +<2 b
|
| +
|
| +** test: tailoring twice before a root position: tertiary
|
| +@ rules
|
| +&[before 3]B<<<p
|
| +&[before 3]B<<<q
|
| +* compare
|
| +<1 b
|
| +<3 p
|
| +<3 q
|
| +<3 B
|
| +
|
| +# tertiary-before common weight
|
| +@ rules
|
| +&[before 3]b<<<p
|
| +&[before 3]b<<<q
|
| +* compare
|
| +<1 a
|
| +<1 p
|
| +<3 q
|
| +<3 b
|
| +
|
| +@ rules
|
| +&[before 2]b<<s
|
| +&[before 3]s<<<p
|
| +&[before 3]s<<<q
|
| +* compare
|
| +<1 a
|
| +<1 p
|
| +<3 q
|
| +<3 s
|
| +<2 b
|
| +
|
| +** test: tailor after completely ignorable
|
| +@ rules
|
| +&\x00<<<x<<y
|
| +* compare
|
| += \x00
|
| += \x1F
|
| +<3 x
|
| +<2 y
|
| +
|
| +** test: secondary tailoring gaps, ICU ticket 9362
|
| +@ rules
|
| +&[before 2]s<<'_'
|
| +&s<<r # secondary between s and ſ (long s)
|
| +&ſ<<*a-q # more than 15 between ſ and secondary CE boundary
|
| +&[before 2][first primary ignorable]<<u<<v # between secondary CE boundary & lowest secondary CE
|
| +&[last primary ignorable]<<y<<z
|
| +
|
| +* compare
|
| +<2 u
|
| +<2 v
|
| +<2 \u0332 # lowest secondary CE
|
| +<2 \u0308
|
| +<2 y
|
| +<2 z
|
| +<1 s_
|
| +<2 ss
|
| +<2 sr
|
| +<2 sſ
|
| +<2 sa
|
| +<2 sb
|
| +<2 sp
|
| +<2 sq
|
| +<2 sus
|
| +<2 svs
|
| +<2 rs
|
| +
|
| +** test: tertiary tailoring gaps, ICU ticket 9362
|
| +@ rules
|
| +&[before 3]t<<<'_'
|
| +&t<<<r # tertiary between t and fullwidth t
|
| +&ᵀ<<<*a-q # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
|
| +&[before 3][first secondary ignorable]<<<u<<<v # between tertiary CE boundary & lowest tertiary CE
|
| +&[last secondary ignorable]<<<y<<<z
|
| +
|
| +* compare
|
| +<3 u
|
| +<3 v
|
| +# Note: The root collator currently does not map any characters to tertiary CEs.
|
| +<3 y
|
| +<3 z
|
| +<1 t_
|
| +<3 tt
|
| +<3 tr
|
| +<3 tt
|
| +<3 tᵀ
|
| +<3 ta
|
| +<3 tb
|
| +<3 tp
|
| +<3 tq
|
| +<3 tut
|
| +<3 tvt
|
| +<3 rt
|
| +
|
| +** test: secondary & tertiary around root character
|
| +@ rules
|
| +&[before 2]m<<r
|
| +&m<<s
|
| +&[before 3]m<<<u
|
| +&m<<<v
|
| +* compare
|
| +<1 l
|
| +<1 r
|
| +<2 u
|
| +<3 m
|
| +<3 v
|
| +<2 s
|
| +<1 n
|
| +
|
| +** test: secondary & tertiary around tailored item
|
| +@ rules
|
| +&m<x
|
| +&[before 2]x<<r
|
| +&x<<s
|
| +&[before 3]x<<<u
|
| +&x<<<v
|
| +* compare
|
| +<1 m
|
| +<1 r
|
| +<2 u
|
| +<3 x
|
| +<3 v
|
| +<2 s
|
| +<1 n
|
| +
|
| +** test: more nesting of secondary & tertiary before
|
| +@ rules
|
| +&[before 3]m<<<u
|
| +&[before 2]m<<r
|
| +&[before 3]r<<<q
|
| +&m<<<w
|
| +&m<<t
|
| +&[before 3]w<<<v
|
| +&w<<<x
|
| +&w<<s
|
| +* compare
|
| +<1 l
|
| +<1 q
|
| +<3 r
|
| +<2 u
|
| +<3 m
|
| +<3 v
|
| +<3 w
|
| +<3 x
|
| +<2 s
|
| +<2 t
|
| +<1 n
|
| +
|
| +** test: case bits
|
| +@ rules
|
| +&w<x # tailored CE getting case bits
|
| + =uv=uV=Uv=UV # 2 chars -> 1 CE
|
| +&ae=ch=cH=Ch=CH # 2 chars -> 2 CEs
|
| +&rst=yz=yZ=Yz=YZ # 2 chars -> 3 CEs
|
| +% caseFirst=lower
|
| +* compare
|
| +<1 ae
|
| += ch
|
| +<3 cH
|
| +<3 Ch
|
| +<3 CH
|
| +<1 rst
|
| += yz
|
| +<3 yZ
|
| +<3 Yz
|
| +<3 YZ
|
| +<1 w
|
| +<1 x
|
| += uv
|
| +<3 uV
|
| += Uv # mixed case on single CE cannot distinguish variations
|
| +<3 UV
|
| +
|
| +** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
|
| +@ rules
|
| +&\u0001<<<t<<<T # tertiary CEs
|
| +% caseFirst=lower
|
| +* compare
|
| +<1 aa
|
| +<3 aat
|
| +<3 aaT
|
| +<3 aA
|
| +<3 aAt
|
| +<3 ata
|
| +<3 aTa
|
| +
|
| +** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
|
| +% caseFirst=upper
|
| +* compare
|
| +<1 aA
|
| +<3 aAt
|
| +<3 aa
|
| +<3 aat
|
| +<3 aaT
|
| +<3 ata
|
| +<3 aTa
|
| +
|
| +** test: reset on expansion, ICU tickets 9415 & 9593
|
| +@ rules
|
| +&æ<x # tailor the last primary CE so that x sorts between ae and af
|
| +&æb=bæ # copy all reset CEs to make bæ sort the same
|
| +&각<h # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
|
| +&⒀<<y # copy/tailor 4 CEs to make y sort with only a secondary difference
|
| +&l·=z # handle the pre-context for · when fetching reset CEs
|
| + <<u # copy/tailor 2 CEs
|
| +
|
| +* compare
|
| +<1 ae
|
| +<2 æ
|
| +<1 x
|
| +<1 af
|
| +
|
| +* compare
|
| +<1 aeb
|
| +<2 æb
|
| += bæ
|
| +
|
| +* compare
|
| +<1 각
|
| +<1 h
|
| +<1 갂
|
| +<1 갃
|
| +
|
| +* compare
|
| +<1 · # by itself: primary CE
|
| +<1 l
|
| +<2 l· # l+middle dot has only a secondary difference from l
|
| += z
|
| +<2 u
|
| +
|
| +* compare
|
| +<1 (13)
|
| +<3 ⒀ # DUCET sets special tertiary weights in all CEs
|
| +<2 y
|
| +<1 (13[
|
| +
|
| +% alternate=shifted
|
| +* compare
|
| +<1 (13)
|
| += 13
|
| +<3 ⒀
|
| += y # alternate=shifted removes the tailoring difference on the last CE
|
| +<1 14
|
| +
|
| +** test: contraction inside extension, ICU ticket 9378
|
| +@ rules
|
| +&а<<х/й # all letters are Cyrillic
|
| +* compare
|
| +<1 ай
|
| +<2 х
|
| +
|
| +** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
|
| +@ rules
|
| +&t<x &ᵀ<y # same primary weights
|
| +&q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
|
| +* compare
|
| +<1 q
|
| +<1 u
|
| +<1 v
|
| +<1 ꝗ
|
| +<1 t
|
| +<3 ᵀ
|
| +<1 y
|
| +<1 x
|
| +
|
| +# Principle: Each rule builds on the state of preceding rules and ignores following rules.
|
| +
|
| +** test: later rule does not affect earlier reset position, ICU ticket 10105
|
| +@ rules
|
| +&a < u < v < w &ov < x &b < v
|
| +* compare
|
| +<1 oa
|
| +<1 ou
|
| +<1 x # CE(o) followed by CE between u and w
|
| +<1 ow
|
| +<1 ob
|
| +<1 ov
|
| +
|
| +** test: later rule does not affect earlier extension (1), ICU ticket 10105
|
| +@ rules
|
| +&a=x/b &v=b
|
| +% strength=secondary
|
| +* compare
|
| +<1 B
|
| +<1 c
|
| +<1 v
|
| += b
|
| +* compare
|
| +<1 AB
|
| += x
|
| +<1 ac
|
| +<1 av
|
| += ab
|
| +
|
| +** test: later rule does not affect earlier extension (2), ICU ticket 10105
|
| +@ rules
|
| +&a <<< c / e &g <<< e / l
|
| +% strength=secondary
|
| +* compare
|
| +<1 AE
|
| += c
|
| +<2 æ
|
| +<1 agl
|
| += ae
|
| +
|
| +** test: later rule does not affect earlier extension (3), ICU ticket 10105
|
| +@ rules
|
| +&a = b / c &d = c / e
|
| +% strength=secondary
|
| +* compare
|
| +<1 AC # C is still only tertiary different from the original c
|
| += b
|
| +<1 ade
|
| += ac
|
| +
|
| +** test: extension contains tailored character, ICU ticket 10105
|
| +@ rules
|
| +&a=e &b=u/e
|
| +* compare
|
| +<1 a
|
| += e
|
| +<1 ba
|
| += be
|
| += u
|
| +
|
| +** test: add simple mappings for characters with root context
|
| +@ rules
|
| +&z=· # middle dot has a prefix mapping in the CLDR root
|
| +&n=и # и (U+0438) has contractions in the root
|
| +* compare
|
| +<1 l
|
| +<2 l· # root mapping for l|· still works
|
| +<1 z
|
| += ·
|
| +* compare
|
| +<1 n
|
| += и
|
| +<1 И
|
| +<1 и\u0306 # root mapping for й=и\u0306 still works
|
| += й
|
| +<3 Й
|
| +
|
| +** test: add context mappings around characters with root context
|
| +@ rules
|
| +&z=·h # middle dot has a prefix mapping in the CLDR root
|
| +&n=ә|и # и (U+0438) has contractions in the root
|
| +* compare
|
| +<1 l
|
| +<2 l· # root mapping for l|· still works
|
| +<1 z
|
| += ·h
|
| +* compare
|
| +<1 и
|
| +<3 И
|
| +<1 и\u0306 # root mapping for й=и\u0306 still works
|
| += й
|
| +* compare
|
| +<1 әn
|
| += әи
|
| +<1 әo
|
| +
|
| +** test: many secondary CEs at the top of their range
|
| +@ rules
|
| +&[last primary ignorable]<<*\u2801-\u28ff
|
| +* compare
|
| +<2 \u0308
|
| +<2 \u2801
|
| +<2 \u2802
|
| +<2 \u2803
|
| +<2 \u2804
|
| +<2 \u28fd
|
| +<2 \u28fe
|
| +<2 \u28ff
|
| +<1 \x20
|
| +
|
| +** test: many tertiary CEs at the top of their range
|
| +@ rules
|
| +&[last secondary ignorable]<<<*a-z
|
| +* compare
|
| +<3 a
|
| +<3 b
|
| +<3 c
|
| +<3 d
|
| +# e..w
|
| +<3 x
|
| +<3 y
|
| +<3 z
|
| +<2 \u0308
|
| +
|
| +** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
|
| +@ rules
|
| +&a=p|x &b=px &c=op
|
| +* compare
|
| +<1 b
|
| += px
|
| +<3 B
|
| +<1 c
|
| += op
|
| +<3 C
|
| +* compare
|
| +<1 ca
|
| += opx # first contraction op, then prefix p|x
|
| +<3 cA
|
| +<3 Ca
|
| +
|
| +** test: reset position with prefix (pre-context), ICU ticket 10102
|
| +@ rules
|
| +&a=p|x &px=y
|
| +* compare
|
| +<1 pa
|
| += px
|
| += y
|
| +<3 pA
|
| +<1 q
|
| +<1 x
|
| +
|
| +** test: prefix+contraction together (1), ICU ticket 10071
|
| +@ rules
|
| +&x=a|bc
|
| +* compare
|
| +<1 ab
|
| +<1 Abc
|
| +<1 abd
|
| +<1 ac
|
| +<1 aw
|
| +<1 ax
|
| += abc
|
| +<3 aX
|
| +<3 Ax
|
| +<1 b
|
| +<1 bb
|
| +<1 bc
|
| +<3 bC
|
| +<3 Bc
|
| +<1 bd
|
| +
|
| +** test: prefix+contraction together (2), ICU ticket 10071
|
| +@ rules
|
| +&w=bc &x=a|b
|
| +* compare
|
| +<1 w
|
| += bc
|
| +<3 W
|
| +* compare
|
| +<1 aw
|
| +<1 ax
|
| += ab
|
| +<3 aX
|
| +<1 axb
|
| +<1 axc
|
| += abc # prefix match a|b takes precedence over contraction match bc
|
| +<3 abC
|
| +<1 abd
|
| +<1 ay
|
| +
|
| +** test: prefix+contraction together (3), ICU ticket 10071
|
| +@ rules
|
| +&x=a|b &w=bc # reverse order of rules as previous test, order should not matter here
|
| +* compare # same "compare" sequences as previous test
|
| +<1 w
|
| += bc
|
| +<3 W
|
| +* compare
|
| +<1 aw
|
| +<1 ax
|
| += ab
|
| +<3 aX
|
| +<1 axb
|
| +<1 axc
|
| += abc # prefix match a|b takes precedence over contraction match bc
|
| +<3 abC
|
| +<1 abd
|
| +<1 ay
|
| +
|
| +** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
|
| +@ rules
|
| +&d=ch &v=p|ci
|
| +* compare
|
| +<1 pc
|
| +<3 pC
|
| +<1 pcH
|
| +<1 pcI
|
| +<1 pd
|
| += pch # no-prefix contraction ch matches
|
| +<3 pD
|
| +<1 pv
|
| += pci # prefix+contraction p|ci matches
|
| +<3 pV
|
| +
|
| +** test: tailor in & around compact ranges of root primaries
|
| +# The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
|
| +# which should be reliably encoded as one range in the root elements data.
|
| +@ rules
|
| +&[before 1]ᚁ<a
|
| +&ᚁ<b
|
| +&[before 1]ᚂ<c
|
| +&ᚂ<d
|
| +&[before 1]ᚚ<y
|
| +&ᚚ<z
|
| +&[before 2]ᚁ<<r
|
| +&ᚁ<<s
|
| +&[before 3]ᚚ<<<t
|
| +&ᚚ<<<u
|
| +* compare
|
| +<1 ᣵ # U+18F5 last Canadian Aboriginal
|
| +<1 a
|
| +<1 r
|
| +<2 ᚁ
|
| +<2 s
|
| +<1 b
|
| +<1 c
|
| +<1 ᚂ
|
| +<1 d
|
| +<1 ᚃ
|
| +<1 ᚙ
|
| +<1 y
|
| +<1 t
|
| +<3 ᚚ
|
| +<3 u
|
| +<1 z
|
| +<1 ᚠ # U+16A0 first Runic
|
| +
|
| +** test: suppressContractions
|
| +@ rules
|
| +&z<ch<әж [suppressContractions [·cә]]
|
| +* compare
|
| +<1 ch
|
| +<3 cH # ch was suppressed
|
| +<1 l
|
| +<1 l· # primary difference, not secondary, because l|· was suppressed
|
| +<1 ә
|
| +<2 ә\u0308 # secondary difference, not primary, because contractions for ә were suppressed
|
| +<1 әж
|
| +<3 әЖ
|
| +
|
| +** test: Hangul & Jamo
|
| +@ rules
|
| +&L=\u1100 # first Jamo L
|
| +&V=\u1161 # first Jamo V
|
| +&T=\u11A8 # first Jamo T
|
| +&\uAC01<<*\u4E00-\u4EFF # first Hangul LVT syllable & lots of secondary diffs
|
| +* compare
|
| +<1 Lv
|
| +<3 LV
|
| += \u1100\u1161
|
| += \uAC00
|
| +<1 LVt
|
| +<3 LVT
|
| += \u1100\u1161\u11A8
|
| += \uAC00\u11A8
|
| += \uAC01
|
| +<2 LVT\u0308
|
| +<2 \u4E00
|
| +<2 \u4E01
|
| +<2 \u4E80
|
| +<2 \u4EFF
|
| +<2 LV\u0308T
|
| +<1 \uAC02
|
| +
|
| +** test: adjust special reset positions according to previous rules, CLDR ticket 6070
|
| +@ rules
|
| +&[last variable]<x
|
| +[maxVariable space] # has effect only after building, no effect on following rules
|
| +&[last variable]<y
|
| +&[before 1][first regular]<z
|
| +* compare
|
| +<1 ? # some punctuation
|
| +<1 x
|
| +<1 y
|
| +<1 z
|
| +<1 $ # some symbol
|
| +
|
| +@ rules
|
| +&[last primary ignorable]<<x<<<y
|
| +&[last primary ignorable]<<z
|
| +* compare
|
| +<2 \u0358
|
| +<2 x
|
| +<3 y
|
| +<2 z
|
| +<1 \x20
|
| +
|
| +@ rules
|
| +&[last secondary ignorable]<<<x
|
| +&[last secondary ignorable]<<<y
|
| +* compare
|
| +<3 x
|
| +<3 y
|
| +<2 \u0358
|
| +
|
| +@ rules
|
| +&[before 2][first variable]<<z
|
| +&[before 2][first variable]<<y
|
| +&[before 3][first variable]<<<x
|
| +&[before 3][first variable]<<<w
|
| +&[before 1][first variable]<v
|
| +&[before 2][first variable]<<u
|
| +&[before 3][first variable]<<<t
|
| +&[before 2]\uFDD1\xA0<<s # FractionalUCA.txt: FDD1 00A0, SPACE first primary
|
| +* compare
|
| +<2 \u0358
|
| +<1 s
|
| +<2 \uFDD1\xA0
|
| +<1 t
|
| +<3 u
|
| +<2 v
|
| +<1 w
|
| +<3 x
|
| +<3 y
|
| +<2 z
|
| +<2 \t
|
| +
|
| +@ rules
|
| +&[before 2][first regular]<<z
|
| +&[before 3][first regular]<<<y
|
| +&[before 1][first regular]<x
|
| +&[before 3][first regular]<<<w
|
| +&[before 2]\uFDD1\u263A<<v # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
|
| +&[before 3][first regular]<<<u
|
| +&[before 1][first regular]<p # primary before the boundary: becomes variable
|
| +&[before 3][first regular]<<<t # not affected by p
|
| +&[last variable]<q # after p!
|
| +* compare
|
| +<1 ?
|
| +<1 p
|
| +<1 q
|
| +<1 t
|
| +<3 u
|
| +<3 v
|
| +<1 w
|
| +<3 x
|
| +<1 y
|
| +<3 z
|
| +<1 $
|
| +
|
| +# check that p & q are indeed variable
|
| +% alternate=shifted
|
| +* compare
|
| += ?
|
| += p
|
| += q
|
| +<1 t
|
| +<3 u
|
| +<3 v
|
| +<1 w
|
| +<3 x
|
| +<1 y
|
| +<3 z
|
| +<1 $
|
| +
|
| +@ rules
|
| +&[before 2][first trailing]<<z
|
| +&[before 1][first trailing]<y
|
| +&[before 3][first trailing]<<<x
|
| +* compare
|
| +<1 \u4E00 # first Han, first implicit
|
| +<1 \uFDD1\uFDD0 # FractionalUCA.txt: unassigned first primary
|
| +# Note: The root collator currently does not map any characters to the trailing first boundary primary.
|
| +<1 x
|
| +<3 y
|
| +<1 z
|
| +<2 \uFFFD # The root collator currently maps U+FFFD to the first real trailing primary.
|
| +
|
| +@ rules
|
| +&[before 2][first primary ignorable]<<z
|
| +&[before 2][first primary ignorable]<<y
|
| +&[before 3][first primary ignorable]<<<x
|
| +&[before 3][first primary ignorable]<<<w
|
| +* compare
|
| += \x01
|
| +<2 w
|
| +<3 x
|
| +<3 y
|
| +<2 z
|
| +<2 \u0301
|
| +
|
| +@ rules
|
| +&[before 3][first secondary ignorable]<<<y
|
| +&[before 3][first secondary ignorable]<<<x
|
| +* compare
|
| += \x01
|
| +<3 x
|
| +<3 y
|
| +<2 \u0301
|
| +
|
| +** test: canonical closure
|
| +@ rules
|
| +&X=A &U=Â
|
| +* compare
|
| +<1 U
|
| += Â
|
| += A\u0302
|
| +<2 Ú # U with acute
|
| += U\u0301
|
| += Ấ # A with circumflex & acute
|
| += Â\u0301
|
| += A\u0302\u0301
|
| +<1 X
|
| += A
|
| +<2 X\u030A # with ring above
|
| += Å
|
| += A\u030A
|
| += \u212B # Angstrom sign
|
| +
|
| +@ rules
|
| +&x=\u5140\u55C0
|
| +* compare
|
| +<1 x
|
| += \u5140\u55C0
|
| += \u5140\uFA0D
|
| += \uFA0C\u55C0
|
| += \uFA0C\uFA0D # CJK compatibility characters
|
| +<3 X
|
| +
|
| +# canonical closure on prefix rules, ICU ticket 9444
|
| +@ rules
|
| +&x=ä|ŝ
|
| +* compare
|
| +<1 äs # not tailored
|
| +<1 äx
|
| += äŝ
|
| += a\u0308s\u0302
|
| += a\u0308ŝ
|
| += äs\u0302
|
| +<3 äX
|
| +
|
| +** test: conjoining Jamo map to expansions
|
| +@ rules
|
| +&gg=\u1101 # Jamo Lead consonant GG
|
| +&nj=\u11AC # Jamo Trail consonant NJ
|
| +* compare
|
| +<1 gg\u1161nj
|
| += \u1101\u1161\u11AC
|
| += \uAE4C\u11AC
|
| += \uAE51
|
| +<3 gg\u1161nJ
|
| +<1 \u1100\u1100
|
| +
|
| +** test: canonical tail closure, ICU ticket 5913
|
| +@ rules
|
| +&a<â
|
| +* compare
|
| +<1 a
|
| +<1 â # tailored
|
| += a\u0302
|
| +<2 a\u0323\u0302 # discontiguous contraction
|
| += ạ\u0302 # equivalent
|
| += ậ # equivalent
|
| +<1 b
|
| +
|
| +@ rules
|
| +&a<ạ
|
| +* compare
|
| +<1 a
|
| +<1 ạ # tailored
|
| += a\u0323
|
| +<2 a\u0323\u0302 # contiguous contraction plus extra diacritic
|
| += ạ\u0302 # equivalent
|
| += ậ # equivalent
|
| +<1 b
|
| +
|
| +# Tail closure should work even if there is a prefix and/or contraction.
|
| +@ rules
|
| +&a<\u5140|câ
|
| +# In order to find discontiguous contractions for \u5140|câ
|
| +# there must exist a mapping for \u5140|ca, regardless of what it maps to.
|
| +# (This follows from the UCA spec.)
|
| +&x=\u5140|ca
|
| +* compare
|
| +<1 \u5140a
|
| += \uFA0Ca
|
| +<1 \u5140câ # tailored
|
| += \uFA0Ccâ
|
| += \u5140ca\u0302
|
| += \uFA0Cca\u0302
|
| +<2 \u5140ca\u0323\u0302 # discontiguous contraction
|
| += \uFA0Cca\u0323\u0302
|
| += \u5140cạ\u0302
|
| += \uFA0Ccạ\u0302
|
| += \u5140cậ
|
| += \uFA0Ccậ
|
| +<1 \u5140b
|
| += \uFA0Cb
|
| +<1 \u5140x
|
| += \u5140ca
|
| +
|
| +# Double-check that without the extra mapping there will be no discontiguous match.
|
| +@ rules
|
| +&a<\u5140|câ
|
| +* compare
|
| +<1 \u5140a
|
| += \uFA0Ca
|
| +<1 \u5140câ # tailored
|
| += \uFA0Ccâ
|
| += \u5140ca\u0302
|
| += \uFA0Cca\u0302
|
| +<1 \u5140b
|
| += \uFA0Cb
|
| +<1 \u5140ca\u0323\u0302 # no discontiguous contraction
|
| += \uFA0Cca\u0323\u0302
|
| += \u5140cạ\u0302
|
| += \uFA0Ccạ\u0302
|
| += \u5140cậ
|
| += \uFA0Ccậ
|
| +
|
| +@ rules
|
| +&a<cạ
|
| +* compare
|
| +<1 a
|
| +<1 cạ # tailored
|
| += ca\u0323
|
| +<2 ca\u0323\u0302 # contiguous contraction plus extra diacritic
|
| += cạ\u0302 # equivalent
|
| += cậ # equivalent
|
| +<1 b
|
| +
|
| +# ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
| +# = 03C9 0313 0300 0345
|
| +# ccc = 0, 230, 230, 240
|
| +@ rules
|
| +&δ=αῳ
|
| +# In order to find discontiguous contractions for αῳ
|
| +# there must exist a mapping for αω, regardless of what it maps to.
|
| +# (This follows from the UCA spec.)
|
| +&ε=αω
|
| +* compare
|
| +<1 δ
|
| += αῳ
|
| += αω\u0345
|
| +<2 αω\u0313\u0300\u0345 # discontiguous contraction
|
| += αὠ\u0300\u0345
|
| += αὢ\u0345
|
| += αᾢ
|
| +<2 αω\u0300\u0313\u0345
|
| += αὼ\u0313\u0345
|
| += αῲ\u0313 # not FCD
|
| +<1 ε
|
| += αω
|
| +
|
| +# Double-check that without the extra mapping there will be no discontiguous match.
|
| +@ rules
|
| +&δ=αῳ
|
| +* compare
|
| +<1 αω\u0313\u0300\u0345 # no discontiguous contraction
|
| += αὠ\u0300\u0345
|
| += αὢ\u0345
|
| += αᾢ
|
| +<2 αω\u0300\u0313\u0345
|
| += αὼ\u0313\u0345
|
| += αῲ\u0313 # not FCD
|
| +<1 δ
|
| += αῳ
|
| += αω\u0345
|
| +
|
| +# Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
|
| +# Tests code paths where the tailored string has a combining mark
|
| +# that does not occur in any composite's decomposition.
|
| +@ rules
|
| +&δ=αὼ\u0315
|
| +* compare
|
| +<1 αω\u0313\u0300\u0315 # Not tailored: The grave accent blocks the comma above.
|
| += αὠ\u0300\u0315
|
| += αὢ\u0315
|
| +<1 δ
|
| += αὼ\u0315
|
| += αω\u0300\u0315
|
| +<2 αω\u0300\u0315\u0345
|
| += αὼ\u0315\u0345
|
| += αῲ\u0315 # not FCD
|
| +
|
| +** test: danish a+a vs. a-umlaut, ICU ticket 9319
|
| +@ rules
|
| +&z<aa
|
| +* compare
|
| +<1 z
|
| +<1 aa
|
| +<2 aa\u0308
|
| += aä
|
| +
|
| +** test: Jamo L with and in prefix
|
| +# Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
|
| +@ rules
|
| +# Jamo Lead consonant G after G or GG
|
| +&[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
|
| +# Jamo Lead consonant GG sorts like G+G
|
| +&\u1100\u1100=\u1101
|
| +# Note: Making G|GG and GG|GG sort the same as G|G+G
|
| +# would require the ability to reset on G|G+G,
|
| +# or we could make G-after-G equal to some secondary-CE character,
|
| +# and reset on a pair of those.
|
| +# (It does not matter much if there are at most two G in a row in real text.)
|
| +* compare
|
| +<1 \u1100
|
| +<2 \u1100\u1100 # only one primary from a sequence of G lead consonants
|
| += \u1101
|
| +<2 \u1100\u1100\u1100
|
| += \u1101\u1100
|
| +# but not = \u1100\u1101, see above
|
| +<1 \u1100\u1161
|
| += \uAC00
|
| +<2 \u1100\u1100\u1161
|
| += \u1100\uAC00 # prefix match from the L of the LV syllable
|
| += \u1101\u1161
|
| += \uAE4C
|
| +
|
| +** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
|
| +@ rules
|
| +# Low secondary CEs for Jamo V & T.
|
| +# Note: T should sort before V for proper syllable order.
|
| +&\u0332 # COMBINING LOW LINE (first primary ignorable)
|
| +<<\u1161<<\u1162
|
| +
|
| +# Korean Jamo lead consonant search rules, part 2:
|
| +# Make modern compound L jamo primary equivalent to non-compound forms.
|
| +
|
| +# Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
|
| +&\u0313 # COMBINING COMMA ABOVE (second primary ignorable)
|
| +=\u1100|\u1100
|
| +=\u1103|\u1103
|
| +=\u1107|\u1107
|
| +=\u1109|\u1109
|
| +=\u110C|\u110C
|
| +
|
| +# Compound L Jamo map to equivalent expansions of primary+secondary CE.
|
| +&\u1100\u0313=\u1101<<<\u3132 # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
|
| +&\u1103\u0313=\u1104<<<\u3138 # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
|
| +&\u1107\u0313=\u1108<<<\u3143 # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
|
| +&\u1109\u0313=\u110A<<<\u3146 # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
|
| +&\u110C\u0313=\u110D<<<\u3149 # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
|
| +
|
| +* compare
|
| +<1 \u1100\u1161
|
| += \uAC00
|
| +<2 \u1100\u1162
|
| += \uAC1C
|
| +<2 \u1100\u1100\u1161
|
| += \u1100\uAC00
|
| += \u1101\u1161
|
| += \uAE4C
|
| +<3 \u3132\u1161
|
| +
|
| +** test: Hangul syllables in prefix & in the interior of a contraction
|
| +@ rules
|
| +&x=\u1100\u1161|a\u1102\u1162z
|
| +* compare
|
| +<1 \u1100\u1161x
|
| += \u1100\u1161a\u1102\u1162z
|
| += \u1100\u1161a\uB0B4z
|
| += \uAC00a\u1102\u1162z
|
| += \uAC00a\uB0B4z
|
| +
|
| +** test: digits are unsafe-backwards when numeric=on
|
| +@ root
|
| +% numeric=on
|
| +* compare
|
| +# If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
|
| +# We need to back up before the identical prefix "1" and compare the full numbers.
|
| +<1 11b
|
| +<1 101a
|
| +
|
| +** test: simple locale data test
|
| +@ locale de
|
| +* compare
|
| +<1 a
|
| +<2 ä
|
| +<1 ae
|
| +<2 æ
|
| +
|
| +@ locale de-u-co-phonebk
|
| +* compare
|
| +<1 a
|
| +<1 ae
|
| +<2 ä
|
| +<2 æ
|
| +
|
| +# The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
|
| +
|
| +** test: DataDrivenCollationTest/TestMorePinyin
|
| +# Testing the primary strength.
|
| +@ locale zh
|
| +% strength=primary
|
| +* compare
|
| +< lā
|
| += lĀ
|
| += Lā
|
| += LĀ
|
| +< lān
|
| += lĀn
|
| +< lē
|
| += lĒ
|
| += Lē
|
| += LĒ
|
| +< lēn
|
| += lĒn
|
| +
|
| +** test: DataDrivenCollationTest/TestLithuanian
|
| +# Lithuanian sort order.
|
| +@ locale lt
|
| +* compare
|
| +< cz
|
| +< č
|
| +< d
|
| +< iz
|
| +< j
|
| +< sz
|
| +< š
|
| +< t
|
| +< zz
|
| +< ž
|
| +
|
| +** test: DataDrivenCollationTest/TestLatvian
|
| +# Latvian sort order.
|
| +@ locale lv
|
| +* compare
|
| +< cz
|
| +< č
|
| +< d
|
| +< gz
|
| +< ģ
|
| +< h
|
| +< iz
|
| +< j
|
| +< kz
|
| +< ķ
|
| +< l
|
| +< lz
|
| +< ļ
|
| +< m
|
| +< nz
|
| +< ņ
|
| +< o
|
| +< rz
|
| +< ŗ
|
| +< s
|
| +< sz
|
| +< š
|
| +< t
|
| +< zz
|
| +< ž
|
| +
|
| +** test: DataDrivenCollationTest/TestEstonian
|
| +# Estonian sort order.
|
| +@ locale et
|
| +* compare
|
| +< sy
|
| +< š
|
| +< šy
|
| +< z
|
| +< zy
|
| +< ž
|
| +< v
|
| +< va
|
| +< w
|
| +< õ
|
| +< õy
|
| +< ä
|
| +< äy
|
| +< ö
|
| +< öy
|
| +< ü
|
| +< üy
|
| +< x
|
| +
|
| +** test: DataDrivenCollationTest/TestAlbanian
|
| +# Albanian sort order.
|
| +@ locale sq
|
| +* compare
|
| +< cz
|
| +< ç
|
| +< d
|
| +< dz
|
| +< dh
|
| +< e
|
| +< ez
|
| +< ë
|
| +< f
|
| +< gz
|
| +< gj
|
| +< h
|
| +< lz
|
| +< ll
|
| +< m
|
| +< nz
|
| +< nj
|
| +< o
|
| +< rz
|
| +< rr
|
| +< s
|
| +< sz
|
| +< sh
|
| +< t
|
| +< tz
|
| +< th
|
| +< u
|
| +< xz
|
| +< xh
|
| +< y
|
| +< zz
|
| +< zh
|
| +
|
| +** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
|
| +# Sorted file has different order.
|
| +@ root
|
| +# normalization=on turned on & off automatically.
|
| +* compare
|
| +< \u5F20
|
| +< \u5F20\u4E00\u8E3F
|
| +
|
| +** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
|
| +# This pretty much crashes.
|
| +@ root
|
| +* compare
|
| +< \u0f71\u0f72\u0f80\u0f71\u0f72
|
| +< \u0f80
|
| +
|
| +** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
|
| +# These are examples of strings that caused trouble in partial sort key testing.
|
| +@ locale th-TH
|
| +* compare
|
| +< \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
|
| +< \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
|
| +* compare
|
| +< \u0E01\u0E07\u0E01\u0E32\u0E23
|
| +< \u0E01\u0E07\u0E42\u0E01\u0E49
|
| +* compare
|
| +< \u0E01\u0E23\u0E19\u0E17\u0E32
|
| +< \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
|
| +* compare
|
| +< \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
|
| +< \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
|
| +* compare
|
| +< \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
|
| +< \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
|
| +
|
| +** test: DataDrivenCollationTest/TestJavaStyleRule
|
| +# java.text allows rules to start as '<<<x<<<y...'
|
| +# we emulate this by assuming a &[first tertiary ignorable] in this case.
|
| +@ rules
|
| +&\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
|
| +* compare
|
| += a
|
| += equal
|
| +< z
|
| +< x
|
| += b # x had become the new first primary ignorable
|
| +< w
|
| +
|
| +** test: DataDrivenCollationTest/TestShiftedIgnorable
|
| +# The UCA states that primary ignorables should be completely
|
| +# ignorable when following a shifted code point.
|
| +@ root
|
| +% alternate=shifted
|
| +% strength=quaternary
|
| +* compare
|
| +< a\u0020b
|
| += a\u0020\u0300b
|
| += a\u0020\u0301b
|
| +< a_b
|
| += a_\u0300b
|
| += a_\u0301b
|
| +< A\u0020b
|
| += A\u0020\u0300b
|
| += A\u0020\u0301b
|
| +< A_b
|
| += A_\u0300b
|
| += A_\u0301b
|
| +< a\u0301b
|
| +< A\u0301b
|
| +< a\u0300b
|
| +< A\u0300b
|
| +
|
| +** test: DataDrivenCollationTest/TestNShiftedIgnorable
|
| +# The UCA states that primary ignorables should be completely
|
| +# ignorable when following a shifted code point.
|
| +@ root
|
| +% alternate=non-ignorable
|
| +% strength=tertiary
|
| +* compare
|
| +< a\u0020b
|
| +< A\u0020b
|
| +< a\u0020\u0301b
|
| +< A\u0020\u0301b
|
| +< a\u0020\u0300b
|
| +< A\u0020\u0300b
|
| +< a_b
|
| +< A_b
|
| +< a_\u0301b
|
| +< A_\u0301b
|
| +< a_\u0300b
|
| +< A_\u0300b
|
| +< a\u0301b
|
| +< A\u0301b
|
| +< a\u0300b
|
| +< A\u0300b
|
| +
|
| +** test: DataDrivenCollationTest/TestSafeSurrogates
|
| +# It turned out that surrogates were not skipped properly
|
| +# when iterating backwards if they were in the middle of a
|
| +# contraction. This test assures that this is fixed.
|
| +@ rules
|
| +&a < x\ud800\udc00b
|
| +* compare
|
| +< a
|
| +< x\ud800\udc00b
|
| +
|
| +** test: DataDrivenCollationTest/da_TestPrimary
|
| +# This test goes through primary strength cases
|
| +@ locale da
|
| +% strength=primary
|
| +* compare
|
| +< Lvi
|
| +< Lwi
|
| +* compare
|
| +< L\u00e4vi
|
| +< L\u00f6wi
|
| +* compare
|
| +< L\u00fcbeck
|
| += Lybeck
|
| +
|
| +** test: DataDrivenCollationTest/da_TestTertiary
|
| +# This test goes through tertiary strength cases
|
| +@ locale da
|
| +% strength=tertiary
|
| +* compare
|
| +< Luc
|
| +< luck
|
| +* compare
|
| +< luck
|
| +< L\u00fcbeck
|
| +* compare
|
| +< lybeck
|
| +< L\u00fcbeck
|
| +* compare
|
| +< L\u00e4vi
|
| +< L\u00f6we
|
| +* compare
|
| +< L\u00f6ww
|
| +< mast
|
| +
|
| +* compare
|
| +< A/S
|
| +< ANDRE
|
| +< ANDR\u00c9
|
| +< ANDREAS
|
| +< AS
|
| +< CA
|
| +< \u00c7A
|
| +< CB
|
| +< \u00c7C
|
| +< D.S.B.
|
| +< DA
|
| +< \u00d0A
|
| +< DB
|
| +< \u00d0C
|
| +< DSB
|
| +< DSC
|
| +< EKSTRA_ARBEJDE
|
| +< EKSTRABUD0
|
| +< H\u00d8ST
|
| +< HAAG
|
| +< H\u00c5NDBOG
|
| +< HAANDV\u00c6RKSBANKEN
|
| +< Karl
|
| +< karl
|
| +< NIELS\u0020J\u00d8RGEN
|
| +< NIELS-J\u00d8RGEN
|
| +< NIELSEN
|
| +< R\u00c9E,\u0020A
|
| +< REE,\u0020B
|
| +< R\u00c9E,\u0020L
|
| +< REE,\u0020V
|
| +< SCHYTT,\u0020B
|
| +< SCHYTT,\u0020H
|
| +< SCH\u00dcTT,\u0020H
|
| +< SCHYTT,\u0020L
|
| +< SCH\u00dcTT,\u0020M
|
| +< SS
|
| +< \u00df
|
| +< SSA
|
| +< STORE\u0020VILDMOSE
|
| +< STOREK\u00c6R0
|
| +< STORM\u0020PETERSEN
|
| +< STORMLY
|
| +< THORVALD
|
| +< THORVARDUR
|
| +< \u00feORVAR\u00d0UR
|
| +< THYGESEN
|
| +< VESTERG\u00c5RD,\u0020A
|
| +< VESTERGAARD,\u0020A
|
| +< VESTERG\u00c5RD,\u0020B
|
| +< \u00c6BLE
|
| +< \u00c4BLE
|
| +< \u00d8BERG
|
| +< \u00d6BERG
|
| +
|
| +* compare
|
| +< andere
|
| +< chaque
|
| +< chemin
|
| +< cote
|
| +< cot\u00e9
|
| +< c\u00f4te
|
| +< c\u00f4t\u00e9
|
| +< \u010du\u010d\u0113t
|
| +< Czech
|
| +< hi\u0161a
|
| +< irdisch
|
| +< lie
|
| +< lire
|
| +< llama
|
| +< l\u00f5ug
|
| +< l\u00f2za
|
| +< lu\u010d
|
| +< luck
|
| +< L\u00fcbeck
|
| +< lye
|
| +< l\u00e4vi
|
| +< L\u00f6wen
|
| +< m\u00e0\u0161ta
|
| +< m\u00eer
|
| +< myndig
|
| +< M\u00e4nner
|
| +< m\u00f6chten
|
| +< pi\u00f1a
|
| +< pint
|
| +< pylon
|
| +< \u0161\u00e0ran
|
| +< savoir
|
| +< \u0160erb\u016bra
|
| +< Sietla
|
| +< \u015blub
|
| +< subtle
|
| +< symbol
|
| +< s\u00e4mtlich
|
| +< verkehrt
|
| +< vox
|
| +< v\u00e4ga
|
| +< waffle
|
| +< wood
|
| +< yen
|
| +< yuan
|
| +< yucca
|
| +< \u017eal
|
| +< \u017eena
|
| +< \u017den\u0113va
|
| +< zoo0
|
| +< Zviedrija
|
| +< Z\u00fcrich
|
| +< zysk0
|
| +< \u00e4ndere
|
| +
|
| +** test: DataDrivenCollationTest/hi_TestNewRules
|
| +# This test goes through new rules and tests against old rules
|
| +@ locale hi
|
| +* compare
|
| +< कॐ
|
| +< कं
|
| +< कँ
|
| +< कः
|
| +
|
| +** test: DataDrivenCollationTest/ro_TestNewRules
|
| +# This test goes through new rules and tests against old rules
|
| +@ locale ro
|
| +* compare
|
| +< xAx
|
| +< xă
|
| +< xĂ
|
| +< Xă
|
| +< XĂ
|
| +< xăx
|
| +< xĂx
|
| +< xâ
|
| +< xÂ
|
| +< Xâ
|
| +< XÂ
|
| +< xâx
|
| +< xÂx
|
| +< xb
|
| +< xIx
|
| +< xî
|
| +< xÎ
|
| +< Xî
|
| +< XÎ
|
| +< xîx
|
| +< xÎx
|
| +< xj
|
| +< xSx
|
| +< xș
|
| += xş
|
| +< xȘ
|
| += xŞ
|
| +< Xș
|
| += Xş
|
| +< XȘ
|
| += XŞ
|
| +< xșx
|
| += xşx
|
| +< xȘx
|
| += xŞx
|
| +< xT
|
| +< xTx
|
| +< xț
|
| += xţ
|
| +< xȚ
|
| += xŢ
|
| +< Xț
|
| += Xţ
|
| +< XȚ
|
| += XŢ
|
| +< xțx
|
| += xţx
|
| +< xȚx
|
| += xŢx
|
| +< xU
|
| +
|
| +** test: DataDrivenCollationTest/testOffsets
|
| +# This tests cases where forwards and backwards iteration get different offsets
|
| +@ locale en
|
| +% strength=tertiary
|
| +* compare
|
| +< a\uD800\uDC00\uDC00
|
| +< b\uD800\uDC00\uDC00
|
| +* compare
|
| +< \u0301A\u0301\u0301
|
| +< \u0301B\u0301\u0301
|
| +* compare
|
| +< abcd\r\u0301
|
| +< abce\r\u0301
|
| +# TODO: test offsets in new CollationTest
|
| +
|
| +# End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
|
| +
|
| +** test: was ICU 52 cmsccoll/TestRedundantRules
|
| +@ rules
|
| +& a < b < c < d& [before 1] c < m
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<1 m
|
| +<1 c
|
| +<1 d
|
| +
|
| +@ rules
|
| +& a < b <<< c << d <<< e& [before 3] e <<< x
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<3 c
|
| +<2 d
|
| +<3 x
|
| +<3 e
|
| +
|
| +@ rules
|
| +& a < b <<< c << d <<< e <<< f < g& [before 1] g < x
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<3 c
|
| +<2 d
|
| +<3 e
|
| +<3 f
|
| +<1 x
|
| +<1 g
|
| +
|
| +@ rules
|
| +& a <<< b << c < d& a < m
|
| +* compare
|
| +<1 a
|
| +<3 b
|
| +<2 c
|
| +<1 m
|
| +<1 d
|
| +
|
| +@ rules
|
| +&a<b<<b\u0301 &z<b
|
| +* compare
|
| +<1 a
|
| +<1 b\u0301
|
| +<1 z
|
| +<1 b
|
| +
|
| +@ rules
|
| +&z<m<<<q<<<m
|
| +* compare
|
| +<1 z
|
| +<1 q
|
| +<3 m
|
| +
|
| +@ rules
|
| +&z<<<m<q<<<m
|
| +* compare
|
| +<1 z
|
| +<1 q
|
| +<3 m
|
| +
|
| +@ rules
|
| +& a < b < c < d& r < c
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<1 d
|
| +<1 r
|
| +<1 c
|
| +
|
| +@ rules
|
| +& a < b < c < d& c < m
|
| +* compare
|
| +<1 a
|
| +<1 b
|
| +<1 c
|
| +<1 m
|
| +<1 d
|
| +
|
| +@ rules
|
| +& a < b < c < d& a < m
|
| +* compare
|
| +<1 a
|
| +<1 m
|
| +<1 b
|
| +<1 c
|
| +<1 d
|
| +
|
| +** test: was ICU 52 cmsccoll/TestExpansionSyntax
|
| +# The following two rules should sort the particular list of strings the same.
|
| +@ rules
|
| +&AE <<< a << b <<< c &d <<< f
|
| +* compare
|
| +<1 AE
|
| +<3 a
|
| +<2 b
|
| +<3 c
|
| +<1 d
|
| +<3 f
|
| +
|
| +@ rules
|
| +&A <<< a / E << b / E <<< c /E &d <<< f
|
| +* compare
|
| +<1 AE
|
| +<3 a
|
| +<2 b
|
| +<3 c
|
| +<1 d
|
| +<3 f
|
| +
|
| +# The following two rules should sort the particular list of strings the same.
|
| +@ rules
|
| +&AE <<< a <<< b << c << d < e < f <<< g
|
| +* compare
|
| +<1 AE
|
| +<3 a
|
| +<3 b
|
| +<2 c
|
| +<2 d
|
| +<1 e
|
| +<1 f
|
| +<3 g
|
| +
|
| +@ rules
|
| +&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
|
| +* compare
|
| +<1 AE
|
| +<3 a
|
| +<3 b
|
| +<2 c
|
| +<2 d
|
| +<1 e
|
| +<1 f
|
| +<3 g
|
| +
|
| +# The following two rules should sort the particular list of strings the same.
|
| +@ rules
|
| +&AE <<< B <<< C / D <<< F
|
| +* compare
|
| +<1 AE
|
| +<3 B
|
| +<3 F
|
| +<1 AED
|
| +<3 C
|
| +
|
| +@ rules
|
| +&A <<< B / E <<< C / ED <<< F / E
|
| +* compare
|
| +<1 AE
|
| +<3 B
|
| +<3 F
|
| +<1 AED
|
| +<3 C
|
| +
|
| +** test: never reorder trailing primaries
|
| +@ root
|
| +% reorder Zzzz Grek
|
| +* compare
|
| +<1 L
|
| +<1 字
|
| +<1 Ω
|
| +<1 \uFFFD
|
| +<1 \uFFFF
|
| +
|
| +** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
|
| +@ rules
|
| +&u=ab|cd
|
| +&v=b|ce
|
| +* compare
|
| +<1 abc
|
| +<1 abcc
|
| +<1 abcf
|
| +<1 abcd
|
| += abu
|
| +<1 abce
|
| += abv
|
| +
|
| +# With the following rules, there is only one prefix per composite ĉ or ç,
|
| +# but both prefixes apply to just c in NFD form.
|
| +# We would get different results for composed vs. NFD input
|
| +# if we fell back directly from longest-prefix mappings to no-prefix mappings.
|
| +@ rules
|
| +&x=op|ĉ
|
| +&y=p|ç
|
| +* compare
|
| +<1 opc
|
| +<2 opć
|
| +<1 opcz
|
| +<1 opd
|
| +<1 opĉ
|
| += opc\u0302
|
| += opx
|
| +<1 opç
|
| += opc\u0327
|
| += opy
|
| +
|
| +# The mapping is used which has the longest matching prefix for which
|
| +# there is also a suffix match, with the longest suffix match among several for that prefix.
|
| +@ rules
|
| +&❶=d
|
| +&❷=de
|
| +&❸=def
|
| +&①=c|d
|
| +&②=c|de
|
| +&③=c|def
|
| +&④=bc|d
|
| +&⑤=bc|de
|
| +&⑥=bc|def
|
| +&⑦=abc|d
|
| +&⑧=abc|de
|
| +&⑨=abc|def
|
| +* compare
|
| +<1 9aadzz
|
| += 9aa❶zz
|
| +<1 9aadez
|
| += 9aa❷z
|
| +<1 9aadef
|
| += 9aa❸
|
| +<1 9acdzz
|
| += 9ac①zz
|
| +<1 9acdez
|
| += 9ac②z
|
| +<1 9acdef
|
| += 9ac③
|
| +<1 9bcdzz
|
| += 9bc④zz
|
| +<1 9bcdez
|
| += 9bc⑤z
|
| +<1 9bcdef
|
| += 9bc⑥
|
| +<1 abcdzz
|
| += abc⑦zz
|
| +<1 abcdez
|
| += abc⑧z
|
| +<1 abcdef
|
| += abc⑨
|
| +
|
| +** test: prefix + discontiguous contraction with missing prefix contraction
|
| +# Unfortunate terminology: The first "prefix" here is the pre-context,
|
| +# the second "prefix" refers to the contraction/relation string that is
|
| +# one shorter than the one being tested.
|
| +@ rules
|
| +&x=p|e
|
| +&y=p|ê
|
| +&z=op|ê
|
| +# No mapping for op|e:
|
| +# Discontiguous contraction matching should not match op|ê in opệ
|
| +# because it would have to skip the dot below and extend a match on op|e by the circumflex,
|
| +# but there is no match on op|e.
|
| +* compare
|
| +<1 oPe
|
| +<1 ope
|
| += opx
|
| +<1 opệ
|
| += opy\u0323 # y not z
|
| +<1 opê
|
| += opz
|
| +
|
| +# We cannot test for fallback by whether the contraction default CE32
|
| +# is for another contraction. With the following rules, there is no mapping for op|e,
|
| +# and the fallback to prefix p has no contractions.
|
| +@ rules
|
| +&x=p|e
|
| +&z=op|ê
|
| +* compare
|
| +<1 oPe
|
| +<1 ope
|
| += opx
|
| +<2 opệ
|
| += opx\u0323\u0302 # x not z
|
| +<1 opê
|
| += opz
|
| +
|
| +# One more variation: Fallback to the simple code point, no shorter non-empty prefix.
|
| +@ rules
|
| +&x=e
|
| +&z=op|ê
|
| +* compare
|
| +<1 ope
|
| += opx
|
| +<3 oPe
|
| += oPx
|
| +<2 opệ
|
| += opx\u0323\u0302 # x not z
|
| +<1 opê
|
| += opz
|
| +
|
| +** test: maxVariable via rules
|
| +@ rules
|
| +[maxVariable space][alternate shifted]
|
| +* compare
|
| += \u0020
|
| += \u000A
|
| +<1 .
|
| +<1 ° # degree sign
|
| +<1 $
|
| +<1 0
|
| +
|
| +** test: maxVariable via setting
|
| +@ root
|
| +% maxVariable=currency
|
| +% alternate=shifted
|
| +* compare
|
| += \u0020
|
| += \u000A
|
| += .
|
| += ° # degree sign
|
| += $
|
| +<1 0
|
| +
|
| +** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
|
| +# This tests canonical closure, but it also tests that CollationFastLatin
|
| +# bails out properly for contractions with combining marks.
|
| +# For that we need pairs of strings that remain in the Latin fastpath
|
| +# long enough, hence the extra "= b" lines.
|
| +@ rules
|
| +&b=\u00e4\u00e4
|
| +* compare
|
| +<1 b
|
| += \u00e4\u00e4
|
| += b
|
| += a\u0308a\u0308
|
| += b
|
| += \u00e4a\u0308
|
| += b
|
| += a\u0308\u00e4
|
| +
|
| +** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
|
| +@ rules
|
| +&b=\u00C5
|
| +* compare
|
| +<1 b
|
| += \u00C5
|
| += b
|
| += A\u030A
|
| += b
|
| += \u212B
|
| +
|
| +** test: reset-before on already-tailored characters, ICU ticket 10108
|
| +@ rules
|
| +&a<w<<x &[before 2]x<<y
|
| +* compare
|
| +<1 a
|
| +<1 w
|
| +<2 y
|
| +<2 x
|
| +
|
| +@ rules
|
| +&a<<w<<<x &[before 2]x<<y
|
| +* compare
|
| +<1 a
|
| +<2 y
|
| +<2 w
|
| +<3 x
|
| +
|
| +@ rules
|
| +&a<w<x &[before 2]x<<y
|
| +* compare
|
| +<1 a
|
| +<1 w
|
| +<1 y
|
| +<2 x
|
| +
|
| +@ rules
|
| +&a<w<<<x &[before 2]x<<y
|
| +* compare
|
| +<1 a
|
| +<1 y
|
| +<2 w
|
| +<3 x
|
| +
|
| +** test: numeric collation with other settings, ICU ticket 9092
|
| +@ root
|
| +% strength=identical
|
| +% caseFirst=upper
|
| +% numeric=on
|
| +* compare
|
| +<1 100\u0020a
|
| +<1 101
|
| +
|
| +** test: collation type fallback from unsupported type, ICU ticket 10149
|
| +@ locale fr-CA-u-co-phonebk
|
| +# Expect the same result as with fr-CA, using backwards-secondary order.
|
| +# That is, we should fall back from the unsupported collation type
|
| +# to the locale's default collation type.
|
| +* compare
|
| +<1 cote
|
| +<2 côte
|
| +<2 coté
|
| +<2 côté
|
| +
|
| +** test: @ is equivalent to [backwards 2], ICU ticket 9956
|
| +@ rules
|
| +&b<a @ &v<<w
|
| +* compare
|
| +<1 b
|
| +<1 a
|
| +<1 cote
|
| +<2 côte
|
| +<2 coté
|
| +<2 côté
|
| +<1 v
|
| +<2 w
|
| +<1 x
|
| +
|
| +** test: shifted+reordering, ICU ticket 9507
|
| +@ root
|
| +% reorder Grek punct space
|
| +% alternate=shifted
|
| +% strength=quaternary
|
| +# Which primaries are "variable" should be determined without script reordering,
|
| +# and then primaries should be reordered whether they are shifted to quaternary or not.
|
| +* compare
|
| +<4 ( # punctuation
|
| +<4 )
|
| +<4 \u0020 # space
|
| +<1 ` # symbol
|
| +<1 ^
|
| +<1 $ # currency symbol
|
| +<1 €
|
| +<1 0 # numbers
|
| +<1 ε # Greek
|
| +<1 e # Latin
|
| +<1 e(e
|
| +<4 e)e
|
| +<4 e\u0020e
|
| +<4 ee
|
| +<3 e(E
|
| +<4 e)E
|
| +<4 e\u0020E
|
| +<4 eE
|
| +
|
| +** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
|
| +@ rules
|
| +&\u0001<<<b<<<B
|
| +% caseFirst=upper
|
| +* compare
|
| +<1 aaa
|
| +<3 aaaB
|
| +
|
| +** test: secondary+case ignores secondary ignorables, ICU ticket 9355
|
| +@ rules
|
| +&\u0001<<<b<<<B
|
| +% strength=secondary
|
| +% caseLevel=on
|
| +* compare
|
| +<1 a
|
| += ab
|
| += aB
|
| +
|
| +** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
|
| +@ rules
|
| +&[before 2] ൌ << ൗ # U+0D57 << U+0D4C == 0D46+0D57
|
| +* compare
|
| +<1 ൗx
|
| +<2 ൌx
|
| +<1 ൗy
|
| +<2 ൌy
|
| +
|
| +** test: quoted apostrophe in compact syntax, ICU ticket 8204
|
| +@ rules
|
| +&q<<*a''c
|
| +* compare
|
| +<1 d
|
| +<1 p
|
| +<1 q
|
| +<2 a
|
| +<2 \u0027
|
| +<2 c
|
| +<1 r
|
| +
|
| +# ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
|
| +** test: locale -u- with collation keywords, ICU ticket 8260
|
| +@ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
|
| +* compare
|
| +<4 \u0020 # space is shifted, strength=quaternary
|
| +<1 ! # punctuation is regular
|
| +<1 2
|
| +<1 12 # numeric sorting
|
| +<1 B
|
| +<c b # uppercase first on case level
|
| +<1 x\u0301\u0308
|
| +<2 x\u0308\u0301 # normalization off
|
| +
|
| +** test: locale @ with collation keywords, ICU ticket 8260
|
| +@ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
|
| +* compare
|
| +<4 $ # currency symbols are shifted, strength=quaternary
|
| +<1 àla
|
| +<2 alà # backwards secondary level
|
| +
|
| +** test: locale -u- with script reordering, ICU ticket 8260
|
| +@ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
|
| +* compare
|
| +<1 \u0020
|
| +<1 あ
|
| +<1 ☂
|
| +<1 Ω
|
| +<1 丂
|
| +<1 ж
|
| +<1 L
|
| +<1 4
|
| +<1 Ձ
|
| +<1 अ
|
| +<1 ሄ
|
| +<1 ฉ
|
| +
|
| +** test: locale @collation=type should be case-insensitive
|
| +@ locale de@coLLation=PhoneBook
|
| +* compare
|
| +<1 ae
|
| +<2 ä
|
| +<3 Ä
|
| +
|
| +** test: import root search rules plus German phonebook rules, ICU ticket 8962
|
| +@ locale de-u-co-search
|
| +* compare
|
| +<1 =
|
| +<1 ≠
|
| +<1 a
|
| +<1 ae
|
| +<2 ä
|
| +
|
| +# Once more, but with runtime builder.
|
| +@ rules
|
| +[import und-u-co-search][import de-u-co-phonebk]
|
| +* compare
|
| +<1 =
|
| +<1 ≠
|
| +<1 a
|
| +<1 ae
|
| +<2 ä
|
| +
|
| +# Once again, with import from "root" not "und" (as in a proper language tag).
|
| +@ rules
|
| +[import root-u-co-search][import de-u-co-phonebk]
|
| +* compare
|
| +<1 =
|
| +<1 ≠
|
| +<1 a
|
| +<1 ae
|
| +<2 ä
|
| +
|
| +** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
|
| +# Greek should sort Greek first.
|
| +@ rules
|
| +[import el]
|
| +* compare
|
| +<1 4
|
| +<1 Ω
|
| +<1 L
|
| +
|
| +# Import Greek, and then reset the reordering.
|
| +@ rules
|
| +[import el][reorder Zzzz]
|
| +* compare
|
| +<1 4
|
| +<1 L
|
| +<1 Ω
|
| +
|
| +# "others" is a synonym for Zzzz.
|
| +@ rules
|
| +[import el][reorder others]
|
| +* compare
|
| +<1 4
|
| +<1 L
|
| +<1 Ω
|
|
|