OLD | NEW |
(Empty) | |
| 1 <?xml version="1.0" encoding="UTF-8"?> |
| 2 |
| 3 <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved --> |
| 4 |
| 5 <!-- Test data file for string search --> |
| 6 <!DOCTYPE stringsearch-tests [ |
| 7 <!ELEMENT stringsearch-tests (test-case+)> |
| 8 <!ATTLIST stringsearch-tests debug IDREF #IMPLIED > |
| 9 <!ELEMENT test-case (pattern, pre?, m?, post?)> |
| 10 <!ATTLIST test-case |
| 11 id ID #REQUIRED |
| 12 locale CDATA "en" |
| 13 strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TE
RTIARY" |
| 14 norm (ON | OFF) "OFF" |
| 15 alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE" |
| 16 > |
| 17 |
| 18 <!ELEMENT pattern (#PCDATA)> |
| 19 <!ELEMENT pre (#PCDATA)> |
| 20 <!ELEMENT m (#PCDATA)> |
| 21 <!ELEMENT post (#PCDATA)> |
| 22 ]> |
| 23 |
| 24 <stringsearch-tests> |
| 25 <!-- debug="test11" (for copying into the above element) --> |
| 26 |
| 27 <!-- Very simple match --> |
| 28 <test-case id="test01" > |
| 29 <pattern>abc</pattern> |
| 30 <pre>xxx</pre><m>abc</m><post>yyy</post> |
| 31 </test-case> |
| 32 |
| 33 <!-- Very simple no-match --> |
| 34 <test-case id="test02" > |
| 35 <pattern>abc</pattern> |
| 36 <pre>xxx</pre><post>yyy</post> |
| 37 </test-case> |
| 38 |
| 39 <!-- Match after several near-misses. --> |
| 40 <test-case id="test03" > |
| 41 <pattern>string</pattern> |
| 42 <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><pos
t> fling</post> |
| 43 </test-case> |
| 44 |
| 45 <test-case id="test04" strength="PRIMARY" > |
| 46 <pattern>FUSS</pattern> |
| 47 <pre>abc</pre><m>fuss</m><post>sss</post> |
| 48 </test-case> |
| 49 |
| 50 <test-case id="test05" strength="PRIMARY" > |
| 51 <pattern>FUSS</pattern> |
| 52 <pre>abc</pre><m>fuß</m><post>sss</post> |
| 53 </test-case> |
| 54 |
| 55 <test-case id="test05.5" strength="PRIMARY" > |
| 56 <pattern>fuss</pattern> |
| 57 <pre>a </pre> |
| 58 <m>fuß</m> |
| 59 <post>ball table</post> |
| 60 </test-case> |
| 61 |
| 62 <test-case id="test06" strength="PRIMARY" > |
| 63 <pattern>fuß</pattern> |
| 64 <pre>abc</pre><m>fuss</m><post>xyz</post> |
| 65 </test-case> |
| 66 |
| 67 <test-case id="test07" strength="SECONDARY" > |
| 68 <pattern>fuß</pattern> |
| 69 <pre>abcfussxyz</pre> |
| 70 </test-case> |
| 71 |
| 72 <test-case id="test08" strength="PRIMARY" > |
| 73 <pattern>fus</pattern> |
| 74 <pre>abcfuß</pre><post>xyz</post> |
| 75 </test-case> |
| 76 |
| 77 <!-- A good match following an initial match that failed because |
| 78 of not ending on a character boundary --> |
| 79 <test-case id="test09" strength="PRIMARY"> |
| 80 <pattern>fus</pattern> |
| 81 <pre>fuß </pre><m>fus</m><post>sss</post> |
| 82 </test-case> |
| 83 |
| 84 |
| 85 <!-- Test cases from usrchdat.c BREAKITERATOREXACT --> |
| 86 |
| 87 <test-case id="test10" strength="TERTIARY"> |
| 88 <pattern>fox</pattern> |
| 89 <m>fox</m><post>y fox</post> |
| 90 </test-case> |
| 91 |
| 92 <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook"
> |
| 93 <pattern>toe</pattern> |
| 94 <pre>This is a </pre><m>Tö</m><post>ne</post> |
| 95 </test-case> |
| 96 |
| 97 <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebo
ok"> |
| 98 <pattern>toe</pattern> |
| 99 <pre>This is a </pre><post>Töne</post> |
| 100 </test-case> |
| 101 |
| 102 <test-case id="test12" strength="TERTIARY"> |
| 103 <pattern>e</pattern> |
| 104 <pre>tésting that é doés not match </pre><m>e</m><post></post> |
| 105 </test-case> |
| 106 |
| 107 <test-case id="test13" strength="PRIMARY" locale="fr"> |
| 108 <pattern>e</pattern> |
| 109 <pre></pre><m>É</m><post>É</post> |
| 110 </test-case> |
| 111 |
| 112 <test-case id="test14" strength="PRIMARY" locale="fr"> |
| 113 <pattern>O</pattern> |
| 114 <pre>C</pre><m>O\u0302</m><post>TÉ</post> |
| 115 </test-case> |
| 116 |
| 117 |
| 118 <!-- Test cases from usrchdat.c STRENGTH --> |
| 119 |
| 120 |
| 121 <test-case id="test15" strength="PRIMARY" locale="en"> |
| 122 <pattern>fox</pattern> |
| 123 <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</pos
t> |
| 124 </test-case> |
| 125 |
| 126 <test-case id="test16" strength="PRIMARY" locale="fr"> |
| 127 <pattern>peche</pattern> |
| 128 <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post> |
| 129 </test-case> |
| 130 |
| 131 <test-case id="test17" strength="PRIMARY" locale="fr"> |
| 132 <pattern>peche</pattern> |
| 133 <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post> |
| 134 </test-case> |
| 135 |
| 136 <test-case id="test18" strength="PRIMARY" locale="fr"> |
| 137 <pattern>peche</pattern> |
| 138 <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post> |
| 139 </test-case> |
| 140 |
| 141 <test-case id="test19" strength="PRIMARY" locale="fr"> |
| 142 <pattern>peche</pattern> |
| 143 <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post> |
| 144 </test-case> |
| 145 |
| 146 <test-case id="test20" strength="PRIMARY" locale="es"> |
| 147 <pattern>channel</pattern> |
| 148 <pre>A </pre><m>channel</m><post>, </post> |
| 149 </test-case> |
| 150 |
| 151 <test-case id="test21" strength="PRIMARY" locale="es"> |
| 152 <pattern>channel</pattern> |
| 153 <pre>A </pre><m>CHANNEL</m><post>, </post> |
| 154 </test-case> |
| 155 |
| 156 <test-case id="test22" strength="PRIMARY" locale="es"> |
| 157 <pattern>channel</pattern> |
| 158 <pre>A </pre><m>Channel</m><post>s, </post> |
| 159 </test-case> |
| 160 |
| 161 <test-case id="test23" strength="PRIMARY" locale="es"> |
| 162 <pattern>channel</pattern> |
| 163 <pre>A </pre><m>channel</m><post>... </post> |
| 164 </test-case> |
| 165 |
| 166 <test-case id="test24" strength="TERTIARY" locale="en"> |
| 167 <pattern>A\u0300</pattern> |
| 168 <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</p
ost> |
| 169 </test-case> |
| 170 |
| 171 <!-- TODO: In the original test data, this test matched at IDENTICAL streng
th. |
| 172 Doesn't seem right. The characters are different. |
| 173 --> |
| 174 <test-case id="test24a" strength="IDENTICAL" locale="en"> |
| 175 <pattern>A\u0300</pattern> |
| 176 <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post> |
| 177 </test-case> |
| 178 |
| 179 <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" loca
le="en"> |
| 180 <pattern>A\u0300</pattern> |
| 181 <pre>At IDENTICAL, shoud this match? </pre> |
| 182 <m>\u00c0</m> |
| 183 <post></post> |
| 184 </test-case> |
| 185 |
| 186 <test-case id="test25" strength="SECONDARY" locale="en"> |
| 187 <pattern>Ű</pattern> |
| 188 <pre>12</pre><m>ű</m><post> Ű</post> |
| 189 </test-case> |
| 190 |
| 191 <test-case id="test26" strength="SECONDARY" locale="en"> |
| 192 <pattern>A</pattern> |
| 193 <pre>12</pre><m>a</m><post>...</post> |
| 194 </test-case> |
| 195 |
| 196 |
| 197 <!-- Test Cases from usrchdat.c, VARIABLE --> |
| 198 <test-case id="test27" strength="TERTIARY" locale="en"> |
| 199 <pattern>blackbird</pattern> |
| 200 <pre>black-bird </pre><m>blackbird</m><post>...</post> |
| 201 </test-case> |
| 202 |
| 203 <test-case id="test28" strength="TERTIARY" locale="en"> |
| 204 <pattern>go</pattern> |
| 205 <pre> on</pre> |
| 206 </test-case> |
| 207 |
| 208 <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening |
| 209 the UStringSearch. How did the orignal test run? --> |
| 210 <!-- |
| 211 <test-case id="test29" strength="PRIMARY" locale="en"> |
| 212 <pattern> </pattern> |
| 213 <pre></pre><m></m><post>abc</post> |
| 214 </test-case> |
| 215 --> |
| 216 |
| 217 <test-case id="test30" strength="SECONDARY" locale="en"> |
| 218 <pattern>abc</pattern> |
| 219 <pre> a bc ab c a bc ab c"</pre> |
| 220 </test-case> |
| 221 |
| 222 <test-case id="test31" strength="SECONDARY" locale="en"> |
| 223 <pattern>abc</pattern> |
| 224 <pre> ---------------</pre> |
| 225 </test-case> |
| 226 |
| 227 |
| 228 <!-- Normalization test cases from usrchdat.c --> |
| 229 <test-case id="test32" strength="TERTIARY" norm="ON"> |
| 230 <pattern>a\u0325\u0300</pattern> |
| 231 <pre></pre><m>a\u0300\u0325</m> |
| 232 </test-case> |
| 233 |
| 234 |
| 235 <test-case id="test32a" strength="TERTIARY" norm="OFF"> |
| 236 <pattern>a\u0325\u0300</pattern> |
| 237 <pre>a\u0300\u0325</pre> |
| 238 </test-case> |
| 239 |
| 240 |
| 241 <!-- COMPOSITEBOUNDARIES from usrchdat.c |
| 242 Boundaries are not identical to orignal test data because |
| 243 of matching only full combining sequences |
| 244 --> |
| 245 <test-case id="test40" strength="TERTIARY"> |
| 246 <pattern>A</pattern> |
| 247 <pre>À</pre> <!-- \u00C0 --> |
| 248 </test-case> |
| 249 |
| 250 <test-case id="test41" strength="TERTIARY"> |
| 251 <pattern>A</pattern> |
| 252 <pre>À</pre><m>A</m><post>C</post> |
| 253 </test-case> |
| 254 |
| 255 <test-case id="test42" strength="TERTIARY"> |
| 256 <pattern>A\u030A</pattern> |
| 257 <pre>À\u01FA</pre> |
| 258 </test-case> |
| 259 |
| 260 |
| 261 |
| 262 <!-- SUPPLEMENTARYCANONICAL from usrchdat.c --> |
| 263 <test-case id="test50" strength="TERTIARY"> |
| 264 <pattern>\uD800\uDC00</pattern> |
| 265 <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m> |
| 266 <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post> |
| 267 </test-case> |
| 268 |
| 269 <test-case id="test51" strength="TERTIARY"> |
| 270 <pattern>\\uD834\\uDDB9</pattern> |
| 271 <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post> |
| 272 </test-case> |
| 273 |
| 274 <test-case id="test52" strength="TERTIARY"> |
| 275 <pattern> \\uD834\\uDDB9 </pattern> |
| 276 <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post> |
| 277 </test-case> |
| 278 |
| 279 <test-case id="test53" strength="TERTIARY"> |
| 280 <pattern>-\\uD834\\uDDB9-</pattern> |
| 281 <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post> |
| 282 </test-case> |
| 283 |
| 284 <test-case id="test54" strength="TERTIARY"> |
| 285 <pattern>,\\uD834\\uDDB9,</pattern> |
| 286 <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post> |
| 287 </test-case> |
| 288 |
| 289 <test-case id="test55" strength="TERTIARY"> |
| 290 <pattern>?\\uD834\\uDDB9?</pattern> |
| 291 <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post> |
| 292 </test-case> |
| 293 |
| 294 |
| 295 <!-- Long combining sequences --> |
| 296 <!-- Backwards search fails because patterns ends w/ ignorables |
| 297 <test-case id="test60" strength="PRIMARY"> |
| 298 <pattern>A\u0301\u0301\u0301\u0301</pattern> |
| 299 <m>A\u0301\u0301\u0301\u0301\u0301</m> |
| 300 </test-case> |
| 301 --> |
| 302 |
| 303 <test-case id="test61" strength="TERTIARY"> |
| 304 <pattern>A\u0301\u0301\u0301\u0301</pattern> |
| 305 <pre>A\u0301\u0301\u0301\u0301\u0301</pre> |
| 306 </test-case> |
| 307 |
| 308 <test-case id="test62" strength="TERTIARY"> |
| 309 <pattern>A\u0301\u0301\u0301\u0301</pattern> |
| 310 <m>A\u0301\u0301\u0301\u0301</m> |
| 311 </test-case> |
| 312 |
| 313 <!-- stand-alone combining marks don't match attached marks --> |
| 314 <test-case id="test63" strength="TERTIARY"> |
| 315 <pattern>\u0301</pattern> |
| 316 <pre>A\u0301\u0301\u0301\u0301</pre> |
| 317 </test-case> |
| 318 |
| 319 <test-case id="test64" strength="TERTIARY"> |
| 320 <pattern>\u0301</pattern> |
| 321 <post>\u0301\u0301\u0301\u0301</post> |
| 322 </test-case> |
| 323 |
| 324 <!-- stand-alone combining mark does match an un-attached combining mark --> |
| 325 <test-case id="test65" strength="TERTIARY"> |
| 326 <pattern>\u0301</pattern> |
| 327 <m>\u0301</m><post>A\u0301\u0301</post> |
| 328 </test-case> |
| 329 |
| 330 <test-case id="test66" strength="TERTIARY"> |
| 331 <pattern>\u0301</pattern> |
| 332 <m>\u0301</m> |
| 333 </test-case> |
| 334 |
| 335 <!-- stand-alone combining marks at end of the target text --> |
| 336 <test-case id="test67" strength="TERTIARY"> |
| 337 <pattern>\u0301</pattern> |
| 338 <pre>abcd\r</pre><m>\u0301</m> |
| 339 </test-case> |
| 340 |
| 341 <!-- attached combining marks at end of the target text, no match --> |
| 342 <test-case id="test68" strength="TERTIARY"> |
| 343 <pattern>\u0301</pattern> |
| 344 <pre>abcd\u0301</pre> |
| 345 </test-case> |
| 346 |
| 347 |
| 348 |
| 349 <!-- no match within expansions at the start --> |
| 350 <test-case id="test70" strength="PRIMARY"> |
| 351 <pattern>Eligature</pattern> |
| 352 <pre>Æligature</pre> |
| 353 </test-case> |
| 354 |
| 355 <test-case id="test71" strength="PRIMARY"> |
| 356 <pattern>AEligature</pattern> |
| 357 <m>Æligature</m> |
| 358 </test-case> |
| 359 |
| 360 <test-case id="test72" strength="PRIMARY"> |
| 361 <pattern>AEligature</pattern> |
| 362 <m>Æligature</m> |
| 363 </test-case> |
| 364 |
| 365 <!-- unattached combining Tilde will not match a Tilde that is |
| 366 part of a composed Ñ (\u00D1) --> |
| 367 <test-case id="test73" strength="SECONDARY"> |
| 368 <pattern>\u0303</pattern> <!-- combining tilde --> |
| 369 <pre>Ñ
</pre><m>\u0303</m> |
| 370 </test-case> |
| 371 |
| 372 <test-case id="test74" strength="SECONDARY"> |
| 373 <pattern>\u0303</pattern> <!-- combining tilde --> |
| 374 <pre>Ñ 
</pre><m>\u0303</m><post>a</post> |
| 375 </test-case> |
| 376 |
| 377 <test-case id="test75" strength="TERTIARY" locale="fr"> |
| 378 <pattern>\u00EA</pattern> |
| 379 <pre>p</pre><m>\u00EA</m><post>che</post> |
| 380 </test-case> |
| 381 |
| 382 <test-case id="test76" strength="TERTIARY" locale="fr"> |
| 383 <pattern>\u00EA</pattern> |
| 384 <pre>p</pre><m>e\u0302</m><post>che</post> |
| 385 </test-case> |
| 386 |
| 387 <test-case id="test77" strength="TERTIARY" locale="fr"> |
| 388 <pattern>e\u0302</pattern> |
| 389 <pre>p</pre><m>\u00EA</m><post>che</post> |
| 390 </test-case> |
| 391 |
| 392 <!-- Test cases from ticket:5382 --> |
| 393 <test-case id="test78" strength="SECONDARY" locale="hu_HU"> |
| 394 <pattern>\u0170</pattern> |
| 395 <m>\u0171</m> |
| 396 <post>12</post> |
| 397 </test-case> |
| 398 |
| 399 <test-case id="test79" strength="SECONDARY" locale="hu_HU"> |
| 400 <pattern>\u0170</pattern> |
| 401 <pre>1</pre> |
| 402 <m>\u0171</m> |
| 403 <post>2</post> |
| 404 </test-case> |
| 405 |
| 406 <test-case id="test80" strength="SECONDARY" locale="hu_HU"> |
| 407 <pattern>\u0170</pattern> |
| 408 <pre>12</pre> |
| 409 <m>\u0171</m> |
| 410 </test-case> |
| 411 |
| 412 <!-- Test cases from ticket:5959 --> |
| 413 <test-case id="test81" strength="SECONDARY"> |
| 414 <pattern>\u2166</pattern> |
| 415 <m>VII</m> |
| 416 </test-case> |
| 417 |
| 418 <test-case id="test82" strength="SECONDARY"> |
| 419 <pattern>VII</pattern> |
| 420 <m>\u2166</m> |
| 421 </test-case> |
| 422 |
| 423 <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" local
e="en"> |
| 424 <pattern>Universal Declaration of Human Rights</pattern> |
| 425 <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post>
as a common standard of achievement for all peoples and all nations</post> |
| 426 </test-case> |
| 427 |
| 428 <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" local
e="en"> |
| 429 <pattern>Universal Declaration of Human Rights</pattern> |
| 430 <pre>Proclaims this </pre> |
| 431 <m>Universal-Declaration-of-Human-Rights</m> |
| 432 <post> as a common standard of achievement for all peoples and all nations</
post> |
| 433 </test-case> |
| 434 |
| 435 <test-case id="test84" strength="TERTIARY" locale="en"> |
| 436 <pattern>\u05E9\u0591\u05E9</pattern> |
| 437 <m>\u05E9\u0592\u05E9</m> |
| 438 </test-case> |
| 439 |
| 440 <test-case id="test84b" strength="IDENTICAL" locale="en"> |
| 441 <pattern>\u05E9\u0591\u05E9</pattern> |
| 442 <pre>\u05E9\u0592\u05E9</pre> |
| 443 </test-case> |
| 444 </stringsearch-tests> |
| 445 |
OLD | NEW |