OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * Copyright (c) 2001-2011,2015 International Business Machines | |
3 * Corporation and others. All Rights Reserved. | |
4 ******************************************************************** | |
5 * File USRCHDAT.H | |
6 * Modification History: | |
7 * Name date Description | |
8 * synwee July 31 2001 creation | |
9 ********************************************************************/ | |
10 | |
11 | |
12 /* | |
13 Note: This file is included by other C and C++ files. This file should not be di
rectly compiled. | |
14 */ | |
15 #ifndef USRCHDAT_C | |
16 #define USRCHDAT_C | |
17 | |
18 #include "unicode/ucol.h" | |
19 | |
20 #if !UCONFIG_NO_COLLATION | |
21 | |
22 /* Set to 1 if matches must be on grapheme boundaries */ | |
23 #define GRAPHEME_BOUNDARIES 1 | |
24 | |
25 U_CDECL_BEGIN | |
26 struct SearchData { | |
27 const char *text; | |
28 const char *pattern; | |
29 const char *collator; /* currently supported "fr" "es" "de", p
lus NULL/other => "en" */ | |
30 UCollationStrength strength; | |
31 USearchAttributeValue elemCompare; /* value for the USEARCH_ELEMENT_CO
MPARISON attribute */ | |
32 const char *breaker; /* currently supported "wordbreaker" for
EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */ | |
33 int8_t offset[32]; | |
34 uint8_t size[32]; | |
35 }; | |
36 U_CDECL_END | |
37 | |
38 typedef struct SearchData SearchData; | |
39 | |
40 static const char *TESTCOLLATORRULE = "& o,O ; p,P"; | |
41 | |
42 static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \
\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; | |
43 | |
44 static const SearchData BASIC[] = { | |
45 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {-1}, {0}}, | |
46 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEM
ENT_COMPARISON, NULL, {13, -1}, | |
47 {6}}, | |
48 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
49 {13, 20, -1}, {6, 6}}, | |
50 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
51 {6, 20, -1}, {6, 6}}, | |
52 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {0, 14, -1}, | |
53 {6, 6}}, | |
54 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {1, -1}, {1}}, | |
55 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {5, -1}, {1}}, | |
56 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
57 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
58 | |
59 #if GRAPHEME_BOUNDARIES | |
60 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
61 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
62 #else | |
63 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
64 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, | |
65 #endif | |
66 | |
67 {"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, | |
68 {"x a\\u0301", "a\\u0301", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {2}}, | |
69 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
70 }; | |
71 | |
72 static const SearchData BREAKITERATOREXACT[] = { | |
73 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "characterbreaker", {0, 5, -1}, | |
74 {3, 3}}, | |
75 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "wordbreaker", {5, -1}, {3}}, | |
76 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, | |
77 "characterbreaker", {10, 14, -1}, {3, 2}}, | |
78 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", | |
79 {10, -1}, {3}}, | |
80 {"Channel, another channel, more channels, and one last Channel", | |
81 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbr
eaker", {0, 54, -1}, {7, 7}}, | |
82 /* jitterbug 1745 */ | |
83 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, | |
84 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, | |
85 {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
86 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1,
28, 41, -1}, {1, 1, 1}}, | |
87 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "
characterbreaker", {0, -1}, {1}}, | |
88 #if 0 | |
89 /* Problem reported by Dave Bertoni, same as ticket 4279? */ | |
90 {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, USEA
RCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, -1}, {2}}, | |
91 #endif | |
92 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
93 }; | |
94 | |
95 #define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \ | |
96 "\\u00E7a p\\u00E8che par, " \ | |
97 "p\\u00E9cher, " \ | |
98 "une p\\u00EAche, " \ | |
99 "un p\\u00EAcher, " \ | |
100 "j\\u2019ai p\\u00EAch\\u00E9, " \ | |
101 "un p\\u00E9cheur, " \ | |
102 "\\u201Cp\\u00E9che\\u201D, " \ | |
103 "decomp peche\\u0301, " \ | |
104 "base peche" | |
105 /* in the above, the interesting words and their offsets are: | |
106 3 pe<301>che<301> | |
107 13 pe<300>che | |
108 24 pe<301>cher | |
109 36 pe<302>che | |
110 46 pe<302>cher | |
111 59 pe<302>che<301> | |
112 69 pe<301>cheur | |
113 79 pe<301>che | |
114 94 peche<+301> | |
115 107 peche | |
116 */ | |
117 | |
118 static const SearchData STRENGTH[] = { | |
119 /*012345678901234567890123456789012345678901234567890123456789*/ | |
120 /*00*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
121 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}
, {3, 3}}, | |
122 /*01*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
123 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16
, -1}, {3}}, | |
124 /*02*/{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAch
er Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat
toe big Toe", | |
125 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NUL
L, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, | |
126 /*03*/{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
127 {10, 14, -1}, {3, 2}}, | |
128 /*04*/{"A channel, another CHANNEL, more Channels, and one last channel...", | |
129 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}}, | |
130 /*05*/{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, U
SEARCH_STANDARD_ELEMENT_COMPARISON, | |
131 NULL, {0, -1}, {1, 0}}, | |
132 /* some tests for modified element comparison, ticket #7093 */ | |
133 /*06*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5
, 5, 5, 5, 5, 6, 5}}, | |
134 /*07*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5
, 6, 5}}, | |
135 /*08*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {107, -1}, {5}}, | |
136 /*09*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
137 /*10*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5
, 5, 5, 6, 5}}, | |
138 /*11*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, | |
139 /*12*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, | |
140 /*13*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, | |
141 /*14*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, | |
142 /*15*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}
, | |
143 /*16*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, | |
144 /*17*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
145 /*18*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
146 /*19*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5,
5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
147 /*20*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5,
5, 5, 5, 6, 5}}, | |
148 /*21*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
149 /*22*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
150 /*23*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5
, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
151 /*24*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 6, 5}}, | |
152 /* more tests for modified element comparison (with fr), ticket #7093
*/ | |
153 /*25*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5
, 5, 5, 5, 5, 6, 5}}, | |
154 /*26*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5
, 6, 5}}, | |
155 /*27*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {107, -1}, {5}}, | |
156 /*28*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
157 /*29*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BA
SE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5
, 5, 5, 6, 5}}, | |
158 /*30*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, | |
159 /*31*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, | |
160 /*32*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, | |
161 /*33*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, | |
162 /*34*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}
, | |
163 /*35*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, | |
164 /*36*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
165 /*37*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATT
ERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
166 /*38*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5,
5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
167 /*39*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_
BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5,
5, 5, 5, 6, 5}}, | |
168 /*40*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
169 /*41*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PAT
TERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
170 /*42*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5
, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
171 /*43*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY
_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5
, 5, 5, 5, 6, 5}}, | |
172 | |
173 #if 0 | |
174 /* Ticket 5382 */ | |
175 {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {2}}, | |
176 #endif | |
177 | |
178 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
179 }; | |
180 | |
181 static const SearchData VARIABLE[] = { | |
182 /*012345678901234567890123456789012345678901234567890123456789*/ | |
183 {"blackbirds black blackbirds blackbird black-bird", | |
184 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, 17, 28, 38, -1}, | |
185 {9, 9, 9, 10}}, | |
186 /* to see that it doesn't go into an infinite loop if the start of text | |
187 is a ignorable character */ | |
188 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL
, {-1}, {0}}, | |
189 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, | |
190 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
191 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
193 /* testing tightest match */ | |
194 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEAR
CH_STANDARD_ELEMENT_COMPARISON, | |
195 NULL, {1, -1}, {3}}, | |
196 /*012345678901234567890123456789012345678901234567890123456789 */ | |
197 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARC
H_STANDARD_ELEMENT_COMPARISON, | |
198 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, | |
199 /* totally ignorable text */ | |
200 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, | |
201 NULL, {-1}, {0}}, | |
202 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
203 }; | |
204 | |
205 static const SearchData NORMEXACT[] = { | |
206 {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, | |
207 | |
208 #if GRAPHEME_BOUNDARIES | |
209 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
210 #else | |
211 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
212 #endif | |
213 | |
214 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
215 }; | |
216 | |
217 static const SearchData NONNORMEXACT[] = { | |
218 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
219 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
220 }; | |
221 | |
222 static const SearchData OVERLAP[] = { | |
223 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 2, 4, -1}, | |
224 {4, 4, 4}}, | |
225 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
226 }; | |
227 | |
228 static const SearchData NONOVERLAP[] = { | |
229 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 4, -1}, {4, 4}}, | |
230 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
231 }; | |
232 | |
233 static const SearchData COLLATOR[] = { | |
234 /* english */ | |
235 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {3}}, | |
236 /* tailored */ | |
237 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 4, -1}, {3, 3}}, | |
238 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
239 }; | |
240 | |
241 static const SearchData PATTERN[] = { | |
242 {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
243 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3
, 3}}, | |
244 {"The quick brown fox jumps over the lazy foxes", "fox", NULL, | |
245 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, | |
246 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
247 }; | |
248 | |
249 static const SearchData TEXT[] = { | |
250 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {4, 15, -1}, | |
251 {3, 3}}, | |
252 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {16, -1}, | |
253 {3}}, | |
254 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
255 }; | |
256 | |
257 static const SearchData COMPOSITEBOUNDARIES[] = { | |
258 #if GRAPHEME_BOUNDARIES | |
259 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {-1}, {0}}, | |
260 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
261 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
262 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
263 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
264 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
265 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
266 #else | |
267 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, | |
268 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
269 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
270 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
271 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
272 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
273 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, 1, -1}, | |
274 {1, 1}}, | |
275 #endif | |
276 | |
277 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
278 /* A + 030A + 0301 */ | |
279 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
280 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {1}}, | |
281 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
282 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
283 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
284 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
285 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
286 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
287 | |
288 #if GRAPHEME_BOUNDARIES | |
289 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
290 #else | |
291 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
292 #endif | |
293 | |
294 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
295 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
296 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
297 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
298 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
299 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
300 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
301 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
302 | |
303 /* Ticket 5024 */ | |
304 {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {0, -1}, {2}}, | |
305 | |
306 /* Ticket 5420 */ | |
307 {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {3}}, | |
308 {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {3}}, | |
309 {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {1, -1}, {2}}, | |
310 | |
311 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
312 }; | |
313 | |
314 static const SearchData MATCH[] = { | |
315 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, | |
316 {7, 26, -1}, {3, 3}}, | |
317 /* 012345678901234567890123456789012345678901234567890 */ | |
318 {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
319 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {
3, 3, 3}}, | |
320 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
321 }; | |
322 | |
323 static const SearchData SUPPLEMENTARY[] = { | |
324 /* 0123456789012345678901234567890123456789012345678900123456789012345678901
23456789012345678901234567890012345678901234567890123456789 */ | |
325 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD
800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
326 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {4, 13, 22, 26, 29, -1}, | |
327 {2, 2, 2, 2, 2}}, | |
328 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, | |
329 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, | |
330 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, | |
331 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
332 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, | |
333 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
334 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, | |
335 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
336 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, | |
337 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
338 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
339 }; | |
340 | |
341 static const char *CONTRACTIONRULE = | |
342 "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315"; | |
343 | |
344 static const SearchData CONTRACTION[] = { | |
345 /* common discontiguous */ | |
346 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
347 | |
348 #if GRAPHEME_BOUNDARIES | |
349 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
350 #else | |
351 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
352 #endif | |
353 | |
354 /* contraction prefix */ | |
355 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, NULL, {-1}, {0}}, | |
356 | |
357 #if GRAPHEME_BOUNDARIES | |
358 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {-1}, {0}}, | |
359 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {-1}, {0}}, | |
360 #else | |
361 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, -1}, {2}}, | |
362 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {1}}, | |
363 #endif | |
364 | |
365 /* discontiguous problem here for backwards iteration. | |
366 accents not found because discontiguous stores all information */ | |
367 {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, | |
368 {0}}, | |
369 /* ends not with a contraction character */ | |
370 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {-1}, | |
371 {0}}, | |
372 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, | |
373 {0, -1}, {3}}, | |
374 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {-1}, | |
375 {0}}, | |
376 /* blocked discontiguous */ | |
377 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, | |
378 {-1}, {0}}, | |
379 | |
380 #if GRAPHEME_BOUNDARIES | |
381 /* | |
382 * "ab" generates a contraction that's an expansion. The "z" matches the | |
383 * first CE of the expansion but the match fails because it ends in the | |
384 * middle of an expansion... | |
385 */ | |
386 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}}, | |
387 #else | |
388 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, -1}, {2}}, | |
389 #endif | |
390 | |
391 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
392 }; | |
393 | |
394 static const char *IGNORABLERULE = "&a = \\u0300"; | |
395 | |
396 static const SearchData IGNORABLE[] = { | |
397 #if GRAPHEME_BOUNDARIES | |
398 /* | |
399 * This isn't much of a test when matches have to be on | |
400 * grapheme boundiaries. The match at 0 only works because | |
401 * it's at the start of the text. | |
402 */ | |
403 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, | |
404 {0, -1}, {2}}, | |
405 #else | |
406 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, | |
407 {0, 3, -1}, {2, 2}}, | |
408 #endif | |
409 | |
410 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
411 }; | |
412 | |
413 static const SearchData BASICCANONICAL[] = { | |
414 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {-1}, {0}}, | |
415 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEM
ENT_COMPARISON, NULL, {13, -1}, | |
416 {6}}, | |
417 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
418 {13, 20, -1}, {6, 6}}, | |
419 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, | |
420 {6, 20, -1}, {6, 6}}, | |
421 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, {0, 14, -1}, | |
422 {6, 6}}, | |
423 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {1, -1}, {1}}, | |
424 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {5, -1}, {1}}, | |
425 | |
426 #if GRAPHEME_BOUNDARIES | |
427 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
428 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
429 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
430 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
431 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEME
NT_COMPARISON, NULL, {-1}, {0}}, | |
432 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
433 NULL, {-1}, {0}}, | |
434 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
435 NULL, {-1}, {0}}, | |
436 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
437 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
438 #else | |
439 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
440 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
441 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, | |
442 {2}}, | |
443 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, | |
444 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEME
NT_COMPARISON, NULL, {1, -1}, {3}}, | |
445 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
446 NULL, {0, -1}, {5}}, | |
447 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, US
EARCH_STANDARD_ELEMENT_COMPARISON, | |
448 NULL, {0, -1}, {5}}, | |
449 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
450 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, 12, -1}, {5, 3}}, | |
451 #endif | |
452 | |
453 {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
454 {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
455 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
456 }; | |
457 | |
458 | |
459 static const SearchData NORMCANONICAL[] = { | |
460 #if GRAPHEME_BOUNDARIES | |
461 /* | |
462 * These tests don't really mean anything. With matches restricted to graphe
me | |
463 * boundaries, isCanonicalMatch doesn't mean anything unless normalization i
s | |
464 * also turned on... | |
465 */ | |
466 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
467 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
468 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
469 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
470 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
471 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
472 #else | |
473 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
474 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
475 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, | |
476 {2}}, | |
477 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, | |
478 {2}}, | |
479 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
480 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
481 #endif | |
482 | |
483 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
484 }; | |
485 | |
486 static const SearchData BREAKITERATORCANONICAL[] = { | |
487 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "characterbreaker", {0, 5, -1}, | |
488 {3, 3}}, | |
489 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, "wordbreaker", {5, -1}, {3}}, | |
490 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, | |
491 "characterbreaker", {10, 14, -1}, {3, 2}}, | |
492 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, "wordbreaker", | |
493 {10, -1}, {3}}, | |
494 {"Channel, another channel, more channels, and one last Channel", | |
495 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbr
eaker", {0, 54, -1}, {7, 7}}, | |
496 /* jitterbug 1745 */ | |
497 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, | |
498 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, | |
499 {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
500 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1,
28, 41, -1}, {1, 1, 1}}, | |
501 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "
characterbreaker", {0, -1}, {1}}, | |
502 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
503 }; | |
504 | |
505 static const SearchData STRENGTHCANONICAL[] = { | |
506 /*012345678901234567890123456789012345678901234567890123456789 */ | |
507 {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
508 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, | |
509 {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
510 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1},
{3}}, | |
511 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod
T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe b
ig Toe", | |
512 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15,
21, 27, 34, -1}, {5, 5, 5, 5}}, | |
513 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELE
MENT_COMPARISON, NULL, | |
514 {10, 14, -1}, {3, 2}}, | |
515 {"A channel, another CHANNEL, more Channels, and one last channel...", | |
516 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2
, 19, 33, 56, -1}, | |
517 {7, 7, 7, 7}}, | |
518 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
519 }; | |
520 | |
521 static const SearchData VARIABLECANONICAL[] = { | |
522 /*012345678901234567890123456789012345678901234567890123456789 */ | |
523 {"blackbirds black blackbirds blackbird black-bird", | |
524 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, 17, 28, 38, -1}, | |
525 {9, 9, 9, 10}}, | |
526 /* to see that it doesn't go into an infinite loop if the start of text | |
527 is a ignorable character */ | |
528 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL
, {-1}, {0}}, | |
529 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_E
LEMENT_COMPARISON, NULL, | |
530 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
531 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
533 /* testing tightest match */ | |
534 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEAR
CH_STANDARD_ELEMENT_COMPARISON, | |
535 NULL, {1, -1}, {3}}, | |
536 /*012345678901234567890123456789012345678901234567890123456789 */ | |
537 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARC
H_STANDARD_ELEMENT_COMPARISON, | |
538 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, | |
539 /* totally ignorable text */ | |
540 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, | |
541 NULL, {-1}, {0}}, | |
542 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
543 }; | |
544 | |
545 static const SearchData OVERLAPCANONICAL[] = { | |
546 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 2, 4, -1}, | |
547 {4, 4, 4}}, | |
548 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
549 }; | |
550 | |
551 static const SearchData NONOVERLAPCANONICAL[] = { | |
552 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, 4, -1}, {4, 4}}, | |
553 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
554 }; | |
555 | |
556 static const SearchData COLLATORCANONICAL[] = { | |
557 /* english */ | |
558 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {3}}, | |
559 /* tailored */ | |
560 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 4, -1}, {3, 3}}, | |
561 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
562 }; | |
563 | |
564 static const SearchData PATTERNCANONICAL[] = { | |
565 {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
566 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3
, 3}}, | |
567 {"The quick brown fox jumps over the lazy foxes", "fox", NULL, | |
568 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {
3, 3}}, | |
569 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
570 }; | |
571 | |
572 static const SearchData TEXTCANONICAL[] = { | |
573 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {4, 15, -1}, | |
574 {3, 3}}, | |
575 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {16, -1}, | |
576 {3}}, | |
577 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
578 }; | |
579 | |
580 static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { | |
581 #if GRAPHEME_BOUNDARIES | |
582 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {-1}, {0}}, | |
583 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
584 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
585 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
586 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {-1}, {0}}, | |
587 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
588 | |
589 /* first one matches only because it's at the start of the text */ | |
590 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
591 | |
592 /* \\u0300 blocked by \\u0300 */ | |
593 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
594 #else | |
595 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, N
ULL, {0, -1}, {1}}, | |
596 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
597 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, 1, -1}, {1, 1}}, | |
598 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {1, -1}, {1}}, | |
599 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {0, -1}, {1}}, | |
600 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
601 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, 1, -1}, | |
602 {1, 1}}, | |
603 /* \\u0300 blocked by \\u0300 */ | |
604 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {2}}, | |
605 #endif | |
606 | |
607 /* A + 030A + 0301 */ | |
608 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
609 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {0, -1}, {1}}, | |
610 | |
611 #if GRAPHEME_BOUNDARIES | |
612 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
613 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
614 #else | |
615 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
616 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {0, -1}, {1}}, | |
617 #endif | |
618 | |
619 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
620 | |
621 #if GRAPHEME_BOUNDARIES | |
622 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
623 #else | |
624 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
625 #endif | |
626 | |
627 /* blocked accent */ | |
628 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
629 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPAR
ISON, NULL, {-1}, {0}}, | |
630 | |
631 #if GRAPHEME_BOUNDARIES | |
632 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {-1}, {0}}, | |
633 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
634 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
635 #else | |
636 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
637 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {1, -1}, {1}}, | |
638 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, | |
639 #endif | |
640 | |
641 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
642 | |
643 #if GRAPHEME_BOUNDARIES | |
644 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
645 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {-1}, {0}}, | |
646 #else | |
647 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
648 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARI
SON, NULL, {0, -1}, {1}}, | |
649 #endif | |
650 | |
651 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_
COMPARISON, NULL, {0, -1}, {1}}, | |
652 | |
653 #if GRAPHEME_BOUNDARIES | |
654 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
655 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {-1}, {0}}, | |
656 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", | |
657 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, -1},
{2}}, | |
658 #else | |
659 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, | |
660 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, -1}, {2}}, | |
661 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", | |
662 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 6, 10, 1
3, -1}, {1, 3, 2, 1}}, | |
663 #endif | |
664 | |
665 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
666 }; | |
667 | |
668 static const SearchData MATCHCANONICAL[] = { | |
669 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STAN
DARD_ELEMENT_COMPARISON, NULL, | |
670 {7, 26, -1}, {3, 3}}, | |
671 /*012345678901234567890123456789012345678901234567890 */ | |
672 {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
673 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {
3, 3, 3}}, | |
674 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
675 }; | |
676 | |
677 static const SearchData SUPPLEMENTARYCANONICAL[] = { | |
678 /*01234567890123456789012345678901234567890123456789001234567890123456789012
3456789012345678901234567890012345678901234567890123456789 */ | |
679 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD
800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
680 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
NULL, {4, 13, 22, 26, 29, -1}, | |
681 {2, 2, 2, 2, 2}}, | |
682 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, | |
683 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, | |
684 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, | |
685 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
686 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, | |
687 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
688 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, | |
689 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
690 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, | |
691 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, | |
692 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
693 }; | |
694 | |
695 static const SearchData CONTRACTIONCANONICAL[] = { | |
696 /* common discontiguous */ | |
697 #if GRAPHEME_BOUNDARIES | |
698 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {-1}, {0}}, | |
699 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
700 #else | |
701 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT
_COMPARISON, NULL, {1, -1}, {2}}, | |
702 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_
ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
703 #endif | |
704 | |
705 /* contraction prefix */ | |
706 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON
, NULL, {-1}, {0}}, | |
707 | |
708 #if GRAPHEME_BOUNDARIES | |
709 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {-1}, {0}}, | |
710 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {-1}, {0}}, | |
711 #else | |
712 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISO
N, NULL, {0, -1}, {2}}, | |
713 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMP
ARISON, NULL, {2, -1}, {1}}, | |
714 #endif | |
715 | |
716 /* discontiguous problem here for backwards iteration. | |
717 forwards gives 0, 4 but backwards give 1, 3 */ | |
718 /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDA
RD_ELEMENT_COMPARISON, NULL, {0, -1}, | |
719 {4}}, */ | |
720 | |
721 /* ends not with a contraction character */ | |
722 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD
_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
723 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, | |
724 | |
725 #if GRAPHEME_BOUNDARIES | |
726 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
727 | |
728 /* blocked discontiguous */ | |
729 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
730 | |
731 /* | |
732 * "ab" generates a contraction that's an expansion. The "z" matches the | |
733 * first CE of the expansion but the match fails because it ends in the | |
734 * middle of an expansion... | |
735 */ | |
736 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {2}}, | |
737 #else | |
738 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {0, -1}, {4}}, | |
739 | |
740 /* blocked discontiguous */ | |
741 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_
STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {4}}, | |
742 | |
743 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{0, -1}, {2}}, | |
744 #endif | |
745 | |
746 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
747 }; | |
748 | |
749 static const SearchData DIACRITICMATCH[] = { | |
750 {"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1
\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-
1}, {4, 3}}, | |
751 {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_ST
ANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
752 {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u0
0C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020
", | |
753 "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDAR
D_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3,
2, 1, 3, 2}}, | |
754 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
755 }; | |
756 | |
757 static const SearchData INDICPREFIXMATCH[] = { | |
758 {"\\u0915\\u0020\\u0915\\u0901\\u0020\\u0915\\u0902\\u0020\\u0915\\u0903\\u0
020\\u0915\\u0940\\u0020\\u0915\\u093F\\u0020\\u0915\\u0943\\u0020\\u0915\\u093C
\\u0020\\u0958", | |
759 "\\u0915", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, 2, 5, 8, 11, 14, 17, 20, 23,-1}, {1, 2, 2, 2, 1, 1, 1, 2, 1}}, | |
760 {"\\u0915\\u0924\\u0020\\u0915\\u0924\\u0940\\u0020\\u0915\\u0924\\u093F\\u0
020\\u0915\\u0924\\u0947\\u0020\\u0915\\u0943\\u0924\\u0020\\u0915\\u0943\\u0924
\\u0947", | |
761 "\\u0915\\u0924", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {0, 3, 7, 11, -1}, {2, 2, 2, 2}}, | |
762 {"\\u0915\\u0924\\u0020\\u0915\\u0924\\u0940\\u0020\\u0915\\u0924\\u093F\\u0
020\\u0915\\u0924\\u0947\\u0020\\u0915\\u0943\\u0924\\u0020\\u0915\\u0943\\u0924
\\u0947", | |
763 "\\u0915\\u0943\\u0924", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPA
RISON, NULL, {15, 19, -1}, {3, 3}}, | |
764 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
{-1}, {0}} | |
765 }; | |
766 | |
767 #endif /* #if !UCONFIG_NO_COLLATION */ | |
768 | |
769 #endif | |
OLD | NEW |