OLD | NEW |
| (Empty) |
1 <?xml version="1.0" encoding="UTF-8"?> | |
2 | |
3 <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved --> | |
4 | |
5 <!-- Test data file for string search --> | |
6 <!DOCTYPE stringsearch-tests [ | |
7 <!ELEMENT stringsearch-tests (test-case+)> | |
8 <!ATTLIST stringsearch-tests debug IDREF #IMPLIED > | |
9 <!ELEMENT test-case (pattern, pre?, m?, post?)> | |
10 <!ATTLIST test-case | |
11 id ID #REQUIRED | |
12 locale CDATA "en" | |
13 strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TE
RTIARY" | |
14 norm (ON | OFF) "OFF" | |
15 alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE" | |
16 > | |
17 | |
18 <!ELEMENT pattern (#PCDATA)> | |
19 <!ELEMENT pre (#PCDATA)> | |
20 <!ELEMENT m (#PCDATA)> | |
21 <!ELEMENT post (#PCDATA)> | |
22 ]> | |
23 | |
24 <stringsearch-tests> | |
25 <!-- debug="test11" (for copying into the above element) --> | |
26 | |
27 <!-- Very simple match --> | |
28 <test-case id="test01" > | |
29 <pattern>abc</pattern> | |
30 <pre>xxx</pre><m>abc</m><post>yyy</post> | |
31 </test-case> | |
32 | |
33 <!-- Very simple no-match --> | |
34 <test-case id="test02" > | |
35 <pattern>abc</pattern> | |
36 <pre>xxx</pre><post>yyy</post> | |
37 </test-case> | |
38 | |
39 <!-- Match after several near-misses. --> | |
40 <test-case id="test03" > | |
41 <pattern>string</pattern> | |
42 <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><pos
t> fling</post> | |
43 </test-case> | |
44 | |
45 <test-case id="test04" strength="PRIMARY" > | |
46 <pattern>FUSS</pattern> | |
47 <pre>abc</pre><m>fuss</m><post>sss</post> | |
48 </test-case> | |
49 | |
50 <test-case id="test05" strength="PRIMARY" > | |
51 <pattern>FUSS</pattern> | |
52 <pre>abc</pre><m>fuß</m><post>sss</post> | |
53 </test-case> | |
54 | |
55 <test-case id="test05.5" strength="PRIMARY" > | |
56 <pattern>fuss</pattern> | |
57 <pre>a </pre> | |
58 <m>fuß</m> | |
59 <post>ball table</post> | |
60 </test-case> | |
61 | |
62 <test-case id="test06" strength="PRIMARY" > | |
63 <pattern>fuß</pattern> | |
64 <pre>abc</pre><m>fuss</m><post>xyz</post> | |
65 </test-case> | |
66 | |
67 <test-case id="test07" strength="SECONDARY" > | |
68 <pattern>fuß</pattern> | |
69 <pre>abcfussxyz</pre> | |
70 </test-case> | |
71 | |
72 <test-case id="test08" strength="PRIMARY" > | |
73 <pattern>fus</pattern> | |
74 <pre>abcfuß</pre><post>xyz</post> | |
75 </test-case> | |
76 | |
77 <!-- A good match following an initial match that failed because | |
78 of not ending on a character boundary --> | |
79 <test-case id="test09" strength="PRIMARY"> | |
80 <pattern>fus</pattern> | |
81 <pre>fuß </pre><m>fus</m><post>sss</post> | |
82 </test-case> | |
83 | |
84 | |
85 <!-- Test cases from usrchdat.c BREAKITERATOREXACT --> | |
86 | |
87 <test-case id="test10" strength="TERTIARY"> | |
88 <pattern>fox</pattern> | |
89 <m>fox</m><post>y fox</post> | |
90 </test-case> | |
91 | |
92 <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook"
> | |
93 <pattern>toe</pattern> | |
94 <pre>This is a </pre><m>Tö</m><post>ne</post> | |
95 </test-case> | |
96 | |
97 <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebo
ok"> | |
98 <pattern>toe</pattern> | |
99 <pre>This is a </pre><post>Töne</post> | |
100 </test-case> | |
101 | |
102 <test-case id="test12" strength="TERTIARY"> | |
103 <pattern>e</pattern> | |
104 <pre>tésting that é doés not match </pre><m>e</m><post></post> | |
105 </test-case> | |
106 | |
107 <test-case id="test13" strength="PRIMARY" locale="fr"> | |
108 <pattern>e</pattern> | |
109 <pre></pre><m>É</m><post>É</post> | |
110 </test-case> | |
111 | |
112 <test-case id="test14" strength="PRIMARY" locale="fr"> | |
113 <pattern>O</pattern> | |
114 <pre>C</pre><m>O\u0302</m><post>TÉ</post> | |
115 </test-case> | |
116 | |
117 | |
118 <!-- Test cases from usrchdat.c STRENGTH --> | |
119 | |
120 | |
121 <test-case id="test15" strength="PRIMARY" locale="en"> | |
122 <pattern>fox</pattern> | |
123 <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</pos
t> | |
124 </test-case> | |
125 | |
126 <test-case id="test16" strength="PRIMARY" locale="fr"> | |
127 <pattern>peche</pattern> | |
128 <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post> | |
129 </test-case> | |
130 | |
131 <test-case id="test17" strength="PRIMARY" locale="fr"> | |
132 <pattern>peche</pattern> | |
133 <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post> | |
134 </test-case> | |
135 | |
136 <test-case id="test18" strength="PRIMARY" locale="fr"> | |
137 <pattern>peche</pattern> | |
138 <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post> | |
139 </test-case> | |
140 | |
141 <test-case id="test19" strength="PRIMARY" locale="fr"> | |
142 <pattern>peche</pattern> | |
143 <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post> | |
144 </test-case> | |
145 | |
146 <test-case id="test20" strength="PRIMARY" locale="es"> | |
147 <pattern>channel</pattern> | |
148 <pre>A </pre><m>channel</m><post>, </post> | |
149 </test-case> | |
150 | |
151 <test-case id="test21" strength="PRIMARY" locale="es"> | |
152 <pattern>channel</pattern> | |
153 <pre>A </pre><m>CHANNEL</m><post>, </post> | |
154 </test-case> | |
155 | |
156 <test-case id="test22" strength="PRIMARY" locale="es"> | |
157 <pattern>channel</pattern> | |
158 <pre>A </pre><m>Channel</m><post>s, </post> | |
159 </test-case> | |
160 | |
161 <test-case id="test23" strength="PRIMARY" locale="es"> | |
162 <pattern>channel</pattern> | |
163 <pre>A </pre><m>channel</m><post>... </post> | |
164 </test-case> | |
165 | |
166 <test-case id="test24" strength="TERTIARY" locale="en"> | |
167 <pattern>A\u0300</pattern> | |
168 <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</p
ost> | |
169 </test-case> | |
170 | |
171 <!-- TODO: In the original test data, this test matched at IDENTICAL streng
th. | |
172 Doesn't seem right. The characters are different. | |
173 --> | |
174 <test-case id="test24a" strength="IDENTICAL" locale="en"> | |
175 <pattern>A\u0300</pattern> | |
176 <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post> | |
177 </test-case> | |
178 | |
179 <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" loca
le="en"> | |
180 <pattern>A\u0300</pattern> | |
181 <pre>At IDENTICAL, shoud this match? </pre> | |
182 <m>\u00c0</m> | |
183 <post></post> | |
184 </test-case> | |
185 | |
186 <test-case id="test25" strength="SECONDARY" locale="en"> | |
187 <pattern>Ű</pattern> | |
188 <pre>12</pre><m>ű</m><post> Ű</post> | |
189 </test-case> | |
190 | |
191 <test-case id="test26" strength="SECONDARY" locale="en"> | |
192 <pattern>A</pattern> | |
193 <pre>12</pre><m>a</m><post>...</post> | |
194 </test-case> | |
195 | |
196 | |
197 <!-- Test Cases from usrchdat.c, VARIABLE --> | |
198 <test-case id="test27" strength="TERTIARY" locale="en"> | |
199 <pattern>blackbird</pattern> | |
200 <pre>black-bird </pre><m>blackbird</m><post>...</post> | |
201 </test-case> | |
202 | |
203 <test-case id="test28" strength="TERTIARY" locale="en"> | |
204 <pattern>go</pattern> | |
205 <pre> on</pre> | |
206 </test-case> | |
207 | |
208 <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening | |
209 the UStringSearch. How did the orignal test run? --> | |
210 <!-- | |
211 <test-case id="test29" strength="PRIMARY" locale="en"> | |
212 <pattern> </pattern> | |
213 <pre></pre><m></m><post>abc</post> | |
214 </test-case> | |
215 --> | |
216 | |
217 <test-case id="test30" strength="SECONDARY" locale="en"> | |
218 <pattern>abc</pattern> | |
219 <pre> a bc ab c a bc ab c"</pre> | |
220 </test-case> | |
221 | |
222 <test-case id="test31" strength="SECONDARY" locale="en"> | |
223 <pattern>abc</pattern> | |
224 <pre> ---------------</pre> | |
225 </test-case> | |
226 | |
227 | |
228 <!-- Normalization test cases from usrchdat.c --> | |
229 <test-case id="test32" strength="TERTIARY" norm="ON"> | |
230 <pattern>a\u0325\u0300</pattern> | |
231 <pre></pre><m>a\u0300\u0325</m> | |
232 </test-case> | |
233 | |
234 | |
235 <test-case id="test32a" strength="TERTIARY" norm="OFF"> | |
236 <pattern>a\u0325\u0300</pattern> | |
237 <pre>a\u0300\u0325</pre> | |
238 </test-case> | |
239 | |
240 | |
241 <!-- COMPOSITEBOUNDARIES from usrchdat.c | |
242 Boundaries are not identical to orignal test data because | |
243 of matching only full combining sequences | |
244 --> | |
245 <test-case id="test40" strength="TERTIARY"> | |
246 <pattern>A</pattern> | |
247 <pre>À</pre> <!-- \u00C0 --> | |
248 </test-case> | |
249 | |
250 <test-case id="test41" strength="TERTIARY"> | |
251 <pattern>A</pattern> | |
252 <pre>À</pre><m>A</m><post>C</post> | |
253 </test-case> | |
254 | |
255 <test-case id="test42" strength="TERTIARY"> | |
256 <pattern>A\u030A</pattern> | |
257 <pre>À\u01FA</pre> | |
258 </test-case> | |
259 | |
260 | |
261 | |
262 <!-- SUPPLEMENTARYCANONICAL from usrchdat.c --> | |
263 <test-case id="test50" strength="TERTIARY"> | |
264 <pattern>\uD800\uDC00</pattern> | |
265 <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m> | |
266 <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post> | |
267 </test-case> | |
268 | |
269 <test-case id="test51" strength="TERTIARY"> | |
270 <pattern>\\uD834\\uDDB9</pattern> | |
271 <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post> | |
272 </test-case> | |
273 | |
274 <test-case id="test52" strength="TERTIARY"> | |
275 <pattern> \\uD834\\uDDB9 </pattern> | |
276 <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post> | |
277 </test-case> | |
278 | |
279 <test-case id="test53" strength="TERTIARY"> | |
280 <pattern>-\\uD834\\uDDB9-</pattern> | |
281 <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post> | |
282 </test-case> | |
283 | |
284 <test-case id="test54" strength="TERTIARY"> | |
285 <pattern>,\\uD834\\uDDB9,</pattern> | |
286 <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post> | |
287 </test-case> | |
288 | |
289 <test-case id="test55" strength="TERTIARY"> | |
290 <pattern>?\\uD834\\uDDB9?</pattern> | |
291 <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post> | |
292 </test-case> | |
293 | |
294 | |
295 <!-- Long combining sequences --> | |
296 <!-- Backwards search fails because patterns ends w/ ignorables | |
297 <test-case id="test60" strength="PRIMARY"> | |
298 <pattern>A\u0301\u0301\u0301\u0301</pattern> | |
299 <m>A\u0301\u0301\u0301\u0301\u0301</m> | |
300 </test-case> | |
301 --> | |
302 | |
303 <test-case id="test61" strength="TERTIARY"> | |
304 <pattern>A\u0301\u0301\u0301\u0301</pattern> | |
305 <pre>A\u0301\u0301\u0301\u0301\u0301</pre> | |
306 </test-case> | |
307 | |
308 <test-case id="test62" strength="TERTIARY"> | |
309 <pattern>A\u0301\u0301\u0301\u0301</pattern> | |
310 <m>A\u0301\u0301\u0301\u0301</m> | |
311 </test-case> | |
312 | |
313 <!-- stand-alone combining marks don't match attached marks --> | |
314 <test-case id="test63" strength="TERTIARY"> | |
315 <pattern>\u0301</pattern> | |
316 <pre>A\u0301\u0301\u0301\u0301</pre> | |
317 </test-case> | |
318 | |
319 <test-case id="test64" strength="TERTIARY"> | |
320 <pattern>\u0301</pattern> | |
321 <post>\u0301\u0301\u0301\u0301</post> | |
322 </test-case> | |
323 | |
324 <!-- stand-alone combining mark does match an un-attached combining mark --> | |
325 <test-case id="test65" strength="TERTIARY"> | |
326 <pattern>\u0301</pattern> | |
327 <m>\u0301</m><post>A\u0301\u0301</post> | |
328 </test-case> | |
329 | |
330 <test-case id="test66" strength="TERTIARY"> | |
331 <pattern>\u0301</pattern> | |
332 <m>\u0301</m> | |
333 </test-case> | |
334 | |
335 <!-- stand-alone combining marks at end of the target text --> | |
336 <test-case id="test67" strength="TERTIARY"> | |
337 <pattern>\u0301</pattern> | |
338 <pre>abcd\r</pre><m>\u0301</m> | |
339 </test-case> | |
340 | |
341 <!-- attached combining marks at end of the target text, no match --> | |
342 <test-case id="test68" strength="TERTIARY"> | |
343 <pattern>\u0301</pattern> | |
344 <pre>abcd\u0301</pre> | |
345 </test-case> | |
346 | |
347 | |
348 | |
349 <!-- no match within expansions at the start --> | |
350 <test-case id="test70" strength="PRIMARY"> | |
351 <pattern>Eligature</pattern> | |
352 <pre>Æligature</pre> | |
353 </test-case> | |
354 | |
355 <test-case id="test71" strength="PRIMARY"> | |
356 <pattern>AEligature</pattern> | |
357 <m>Æligature</m> | |
358 </test-case> | |
359 | |
360 <test-case id="test72" strength="PRIMARY"> | |
361 <pattern>AEligature</pattern> | |
362 <m>Æligature</m> | |
363 </test-case> | |
364 | |
365 <!-- unattached combining Tilde will not match a Tilde that is | |
366 part of a composed Ñ (\u00D1) --> | |
367 <test-case id="test73" strength="SECONDARY"> | |
368 <pattern>\u0303</pattern> <!-- combining tilde --> | |
369 <pre>Ñ
</pre><m>\u0303</m> | |
370 </test-case> | |
371 | |
372 <test-case id="test74" strength="SECONDARY"> | |
373 <pattern>\u0303</pattern> <!-- combining tilde --> | |
374 <pre>Ñ 
</pre><m>\u0303</m><post>a</post> | |
375 </test-case> | |
376 | |
377 <test-case id="test75" strength="TERTIARY" locale="fr"> | |
378 <pattern>\u00EA</pattern> | |
379 <pre>p</pre><m>\u00EA</m><post>che</post> | |
380 </test-case> | |
381 | |
382 <test-case id="test76" strength="TERTIARY" locale="fr"> | |
383 <pattern>\u00EA</pattern> | |
384 <pre>p</pre><m>e\u0302</m><post>che</post> | |
385 </test-case> | |
386 | |
387 <test-case id="test77" strength="TERTIARY" locale="fr"> | |
388 <pattern>e\u0302</pattern> | |
389 <pre>p</pre><m>\u00EA</m><post>che</post> | |
390 </test-case> | |
391 | |
392 <!-- Test cases from ticket:5382 --> | |
393 <test-case id="test78" strength="SECONDARY" locale="hu_HU"> | |
394 <pattern>\u0170</pattern> | |
395 <m>\u0171</m> | |
396 <post>12</post> | |
397 </test-case> | |
398 | |
399 <test-case id="test79" strength="SECONDARY" locale="hu_HU"> | |
400 <pattern>\u0170</pattern> | |
401 <pre>1</pre> | |
402 <m>\u0171</m> | |
403 <post>2</post> | |
404 </test-case> | |
405 | |
406 <test-case id="test80" strength="SECONDARY" locale="hu_HU"> | |
407 <pattern>\u0170</pattern> | |
408 <pre>12</pre> | |
409 <m>\u0171</m> | |
410 </test-case> | |
411 | |
412 <!-- Test cases from ticket:5959 --> | |
413 <test-case id="test81" strength="SECONDARY"> | |
414 <pattern>\u2166</pattern> | |
415 <m>VII</m> | |
416 </test-case> | |
417 | |
418 <test-case id="test82" strength="SECONDARY"> | |
419 <pattern>VII</pattern> | |
420 <m>\u2166</m> | |
421 </test-case> | |
422 | |
423 <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" local
e="en"> | |
424 <pattern>Universal Declaration of Human Rights</pattern> | |
425 <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post>
as a common standard of achievement for all peoples and all nations</post> | |
426 </test-case> | |
427 | |
428 <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" local
e="en"> | |
429 <pattern>Universal Declaration of Human Rights</pattern> | |
430 <pre>Proclaims this </pre> | |
431 <m>Universal-Declaration-of-Human-Rights</m> | |
432 <post> as a common standard of achievement for all peoples and all nations</
post> | |
433 </test-case> | |
434 | |
435 <test-case id="test84" strength="TERTIARY" locale="en"> | |
436 <pattern>\u05E9\u0591\u05E9</pattern> | |
437 <m>\u05E9\u0592\u05E9</m> | |
438 </test-case> | |
439 | |
440 <test-case id="test84b" strength="IDENTICAL" locale="en"> | |
441 <pattern>\u05E9\u0591\u05E9</pattern> | |
442 <pre>\u05E9\u0592\u05E9</pre> | |
443 </test-case> | |
444 </stringsearch-tests> | |
445 | |
OLD | NEW |