OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * COPYRIGHT: | |
3 * Copyright (c) 1997-2014, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ********************************************************************/ | |
6 | |
7 #include "unicode/utypes.h" | |
8 | |
9 #if !UCONFIG_NO_COLLATION | |
10 | |
11 #include "unicode/coll.h" | |
12 #include "unicode/localpointer.h" | |
13 #include "unicode/tblcoll.h" | |
14 #include "unicode/unistr.h" | |
15 #include "unicode/sortkey.h" | |
16 #include "regcoll.h" | |
17 #include "sfwdchit.h" | |
18 #include "testutil.h" | |
19 #include "cmemory.h" | |
20 | |
21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) | |
22 | |
23 CollationRegressionTest::CollationRegressionTest() | |
24 { | |
25 UErrorCode status = U_ZERO_ERROR; | |
26 | |
27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), statu
s); | |
28 if(U_FAILURE(status)) { | |
29 delete en_us; | |
30 en_us = 0; | |
31 errcheckln(status, "Collator creation failed with %s", u_errorName(status)
); | |
32 return; | |
33 } | |
34 } | |
35 | |
36 CollationRegressionTest::~CollationRegressionTest() | |
37 { | |
38 delete en_us; | |
39 } | |
40 | |
41 | |
42 // @bug 4048446 | |
43 // | |
44 // CollationElementIterator.reset() doesn't work | |
45 // | |
46 void CollationRegressionTest::Test4048446(/* char* par */) | |
47 { | |
48 const UnicodeString test1 = "XFILE What subset of all possible test cases ha
s the highest probability of detecting the most errors?"; | |
49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases h
as the lowest probability of detecting the least errors?"; | |
50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); | |
51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); | |
52 UErrorCode status = U_ZERO_ERROR; | |
53 | |
54 if (i1 == NULL|| i2 == NULL) | |
55 { | |
56 errln("Could not create CollationElementIterator's"); | |
57 delete i1; | |
58 delete i2; | |
59 return; | |
60 } | |
61 | |
62 while (i1->next(status) != CollationElementIterator::NULLORDER) | |
63 { | |
64 if (U_FAILURE(status)) | |
65 { | |
66 errln("error calling next()"); | |
67 | |
68 delete i1; | |
69 delete i2; | |
70 return; | |
71 } | |
72 } | |
73 | |
74 i1->reset(); | |
75 | |
76 assertEqual(*i1, *i2); | |
77 | |
78 delete i1; | |
79 delete i2; | |
80 } | |
81 | |
82 // @bug 4051866 | |
83 // | |
84 // Collator -> rules -> Collator round-trip broken for expanding characters | |
85 // | |
86 void CollationRegressionTest::Test4051866(/* char* par */) | |
87 { | |
88 UnicodeString rules; | |
89 UErrorCode status = U_ZERO_ERROR; | |
90 | |
91 rules += "&n < o "; | |
92 rules += "& oe ,o"; | |
93 rules += (UChar)0x3080; | |
94 rules += "& oe ,"; | |
95 rules += (UChar)0x1530; | |
96 rules += " ,O"; | |
97 rules += "& OE ,O"; | |
98 rules += (UChar)0x3080; | |
99 rules += "& OE ,"; | |
100 rules += (UChar)0x1520; | |
101 rules += "< p ,P"; | |
102 | |
103 // Build a collator containing expanding characters | |
104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), sta
tus); | |
105 if (U_FAILURE(status)) { | |
106 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status))
; | |
107 return; | |
108 } | |
109 | |
110 // Build another using the rules from the first | |
111 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), sta
tus), status); | |
112 if (U_FAILURE(status)) { | |
113 errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_err
orName(status)); | |
114 return; | |
115 } | |
116 | |
117 // Make sure they're the same | |
118 if (!(c1->getRules() == c2->getRules())) | |
119 { | |
120 errln("Rules are not equal"); | |
121 } | |
122 } | |
123 | |
124 // @bug 4053636 | |
125 // | |
126 // Collator thinks "black-bird" == "black" | |
127 // | |
128 void CollationRegressionTest::Test4053636(/* char* par */) | |
129 { | |
130 if (en_us->equals("black_bird", "black")) | |
131 { | |
132 errln("black-bird == black"); | |
133 } | |
134 } | |
135 | |
136 // @bug 4054238 | |
137 // | |
138 // CollationElementIterator will not work correctly if the associated | |
139 // Collator object's mode is changed | |
140 // | |
141 void CollationRegressionTest::Test4054238(/* char* par */) | |
142 { | |
143 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x
72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; | |
144 const UnicodeString test3(chars3); | |
145 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
146 | |
147 // NOTE: The Java code uses en_us to create the CollationElementIterators | |
148 // but I'm pretty sure that's wrong, so I've changed this to use c. | |
149 UErrorCode status = U_ZERO_ERROR; | |
150 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
151 CollationElementIterator *i1 = c->createCollationElementIterator(test3); | |
152 delete i1; | |
153 delete c; | |
154 } | |
155 | |
156 // @bug 4054734 | |
157 // | |
158 // Collator::IDENTICAL documented but not implemented | |
159 // | |
160 void CollationRegressionTest::Test4054734(/* char* par */) | |
161 { | |
162 /* | |
163 Here's the original Java: | |
164 | |
165 String[] decomp = { | |
166 "\u0001", "<", "\u0002", | |
167 "\u0001", "=", "\u0001", | |
168 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compar
ed bitwise | |
169 "\u00C0", "=", "A\u0300" // Decomp should make these equa
l | |
170 }; | |
171 | |
172 String[] nodecomp = { | |
173 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave | |
174 }; | |
175 */ | |
176 | |
177 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
178 { | |
179 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, | |
180 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, | |
181 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, | |
182 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} | |
183 }; | |
184 | |
185 | |
186 UErrorCode status = U_ZERO_ERROR; | |
187 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
188 | |
189 c->setStrength(Collator::IDENTICAL); | |
190 | |
191 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
192 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); | |
193 | |
194 delete c; | |
195 } | |
196 | |
197 // @bug 4054736 | |
198 // | |
199 // Full Decomposition mode not implemented | |
200 // | |
201 void CollationRegressionTest::Test4054736(/* char* par */) | |
202 { | |
203 UErrorCode status = U_ZERO_ERROR; | |
204 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
205 | |
206 c->setStrength(Collator::SECONDARY); | |
207 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
208 | |
209 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
210 { | |
211 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed | |
212 }; | |
213 | |
214 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
215 | |
216 delete c; | |
217 } | |
218 | |
219 // @bug 4058613 | |
220 // | |
221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korea
n | |
222 // | |
223 void CollationRegressionTest::Test4058613(/* char* par */) | |
224 { | |
225 // Creating a default collator doesn't work when Korean is the default | |
226 // locale | |
227 | |
228 Locale oldDefault = Locale::getDefault(); | |
229 UErrorCode status = U_ZERO_ERROR; | |
230 | |
231 Locale::setDefault(Locale::getKorean(), status); | |
232 | |
233 if (U_FAILURE(status)) | |
234 { | |
235 errln("Could not set default locale to Locale::KOREAN"); | |
236 return; | |
237 } | |
238 | |
239 Collator *c = NULL; | |
240 | |
241 c = Collator::createInstance("en_US", status); | |
242 | |
243 if (c == NULL || U_FAILURE(status)) | |
244 { | |
245 errln("Could not create a Korean collator"); | |
246 Locale::setDefault(oldDefault, status); | |
247 delete c; | |
248 return; | |
249 } | |
250 | |
251 // Since the fix to this bug was to turn off decomposition for Korean collat
ors, | |
252 // ensure that's what we got | |
253 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) | |
254 { | |
255 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); | |
256 } | |
257 | |
258 delete c; | |
259 | |
260 Locale::setDefault(oldDefault, status); | |
261 } | |
262 | |
263 // @bug 4059820 | |
264 // | |
265 // RuleBasedCollator.getRules does not return the exact pattern as input | |
266 // for expanding character sequences | |
267 // | |
268 void CollationRegressionTest::Test4059820(/* char* par */) | |
269 { | |
270 UErrorCode status = U_ZERO_ERROR; | |
271 | |
272 RuleBasedCollator *c = NULL; | |
273 UnicodeString rules = "&9 < a < b , c/a < d < z"; | |
274 | |
275 c = new RuleBasedCollator(rules, status); | |
276 | |
277 if (c == NULL || U_FAILURE(status)) | |
278 { | |
279 errln("Failure building a collator."); | |
280 delete c; | |
281 return; | |
282 } | |
283 | |
284 if ( c->getRules().indexOf("c/a") == -1) | |
285 { | |
286 errln("returned rules do not contain 'c/a'"); | |
287 } | |
288 | |
289 delete c; | |
290 } | |
291 | |
292 // @bug 4060154 | |
293 // | |
294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" | |
295 // | |
296 void CollationRegressionTest::Test4060154(/* char* par */) | |
297 { | |
298 UErrorCode status = U_ZERO_ERROR; | |
299 UnicodeString rules; | |
300 | |
301 rules += "&f < g, G < h, H < i, I < j, J"; | |
302 rules += " & H < "; | |
303 rules += (UChar)0x0131; | |
304 rules += ", "; | |
305 rules += (UChar)0x0130; | |
306 rules += ", i, I"; | |
307 | |
308 RuleBasedCollator *c = NULL; | |
309 | |
310 c = new RuleBasedCollator(rules, status); | |
311 | |
312 if (c == NULL || U_FAILURE(status)) | |
313 { | |
314 errln("failure building collator."); | |
315 delete c; | |
316 return; | |
317 } | |
318 | |
319 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
320 | |
321 /* | |
322 String[] tertiary = { | |
323 "A", "<", "B", | |
324 "H", "<", "\u0131", | |
325 "H", "<", "I", | |
326 "\u0131", "<", "\u0130", | |
327 "\u0130", "<", "i", | |
328 "\u0130", ">", "H", | |
329 }; | |
330 */ | |
331 | |
332 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
333 { | |
334 {0x41, 0}, {0x3c, 0}, {0x42, 0}, | |
335 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, | |
336 {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
337 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, | |
338 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, | |
339 {0x0130, 0}, {0x3e, 0}, {0x48, 0} | |
340 }; | |
341 | |
342 c->setStrength(Collator::TERTIARY); | |
343 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); | |
344 | |
345 /* | |
346 String[] secondary = { | |
347 "H", "<", "I", | |
348 "\u0131", "=", "\u0130", | |
349 }; | |
350 */ | |
351 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
352 { | |
353 {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
354 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} | |
355 }; | |
356 | |
357 c->setStrength(Collator::PRIMARY); | |
358 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); | |
359 | |
360 delete c; | |
361 } | |
362 | |
363 // @bug 4062418 | |
364 // | |
365 // Secondary/Tertiary comparison incorrect in French Secondary | |
366 // | |
367 void CollationRegressionTest::Test4062418(/* char* par */) | |
368 { | |
369 UErrorCode status = U_ZERO_ERROR; | |
370 | |
371 RuleBasedCollator *c = NULL; | |
372 | |
373 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench()
, status); | |
374 | |
375 if (c == NULL || U_FAILURE(status)) | |
376 { | |
377 errln("Failed to create collator for Locale::getCanadaFrench()"); | |
378 delete c; | |
379 return; | |
380 } | |
381 | |
382 c->setStrength(Collator::SECONDARY); | |
383 | |
384 /* | |
385 String[] tests = { | |
386 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents
from end, p\u00e9ch\u00e9 is greater | |
387 }; | |
388 */ | |
389 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
390 { | |
391 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x6
8, 0x00E9, 0} | |
392 }; | |
393 | |
394 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
395 | |
396 delete c; | |
397 } | |
398 | |
399 // @bug 4065540 | |
400 // | |
401 // Collator::compare() method broken if either string contains spaces | |
402 // | |
403 void CollationRegressionTest::Test4065540(/* char* par */) | |
404 { | |
405 if (en_us->compare("abcd e", "abcd f") == 0) | |
406 { | |
407 errln("'abcd e' == 'abcd f'"); | |
408 } | |
409 } | |
410 | |
411 // @bug 4066189 | |
412 // | |
413 // Unicode characters need to be recursively decomposed to get the | |
414 // correct result. For example, | |
415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. | |
416 // | |
417 void CollationRegressionTest::Test4066189(/* char* par */) | |
418 { | |
419 static const UChar chars1[] = {0x1EB1, 0}; | |
420 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; | |
421 const UnicodeString test1(chars1); | |
422 const UnicodeString test2(chars2); | |
423 UErrorCode status = U_ZERO_ERROR; | |
424 | |
425 // NOTE: The java code used en_us to create the | |
426 // CollationElementIterator's. I'm pretty sure that | |
427 // was wrong, so I've change the code to use c1 and c2 | |
428 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); | |
429 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
430 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); | |
431 | |
432 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); | |
433 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
434 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); | |
435 | |
436 assertEqual(*i1, *i2); | |
437 | |
438 delete i2; | |
439 delete c2; | |
440 delete i1; | |
441 delete c1; | |
442 } | |
443 | |
444 // @bug 4066696 | |
445 // | |
446 // French secondary collation checking at the end of compare iteration fails | |
447 // | |
448 void CollationRegressionTest::Test4066696(/* char* par */) | |
449 { | |
450 UErrorCode status = U_ZERO_ERROR; | |
451 RuleBasedCollator *c = NULL; | |
452 | |
453 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(),
status); | |
454 | |
455 if (c == NULL || U_FAILURE(status)) | |
456 { | |
457 errln("Failure creating collator for Locale::getCanadaFrench()"); | |
458 delete c; | |
459 return; | |
460 } | |
461 | |
462 c->setStrength(Collator::SECONDARY); | |
463 | |
464 /* | |
465 String[] tests = { | |
466 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute | |
467 }; | |
468 | |
469 should be: | |
470 | |
471 String[] tests = { | |
472 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute | |
473 }; | |
474 | |
475 */ | |
476 | |
477 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
478 { | |
479 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} | |
480 }; | |
481 | |
482 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
483 | |
484 delete c; | |
485 } | |
486 | |
487 // @bug 4076676 | |
488 // | |
489 // Bad canonicalization of same-class combining characters | |
490 // | |
491 void CollationRegressionTest::Test4076676(/* char* par */) | |
492 { | |
493 // These combining characters are all in the same class, so they should not | |
494 // be reordered, and they should compare as unequal. | |
495 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; | |
496 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; | |
497 | |
498 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
499 c->setStrength(Collator::TERTIARY); | |
500 | |
501 if (c->compare(s1,s2) == 0) | |
502 { | |
503 errln("Same-class combining chars were reordered"); | |
504 } | |
505 | |
506 delete c; | |
507 } | |
508 | |
509 // @bug 4079231 | |
510 // | |
511 // RuleBasedCollator::operator==(NULL) throws NullPointerException | |
512 // | |
513 void CollationRegressionTest::Test4079231(/* char* par */) | |
514 { | |
515 // I don't think there's any way to write this test | |
516 // in C++. The following is equivalent to the Java, | |
517 // but doesn't compile 'cause NULL can't be converted | |
518 // to Collator& | |
519 // | |
520 // if (en_us->operator==(NULL)) | |
521 // { | |
522 // errln("en_us->operator==(NULL) returned TRUE"); | |
523 // } | |
524 | |
525 /* | |
526 try { | |
527 if (en_us->equals(null)) { | |
528 errln("en_us->equals(null) returned true"); | |
529 } | |
530 } | |
531 catch (Exception e) { | |
532 errln("en_us->equals(null) threw " + e.toString()); | |
533 } | |
534 */ | |
535 } | |
536 | |
537 // @bug 4078588 | |
538 // | |
539 // RuleBasedCollator breaks on "< a < bb" rule | |
540 // | |
541 void CollationRegressionTest::Test4078588(/* char *par */) | |
542 { | |
543 UErrorCode status = U_ZERO_ERROR; | |
544 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); | |
545 | |
546 if (rbc == NULL || U_FAILURE(status)) | |
547 { | |
548 errln("Failed to create RuleBasedCollator."); | |
549 delete rbc; | |
550 return; | |
551 } | |
552 | |
553 Collator::EComparisonResult result = rbc->compare("a","bb"); | |
554 | |
555 if (result != Collator::LESS) | |
556 { | |
557 errln((UnicodeString)"Compare(a,bb) returned " + (int)result | |
558 + (UnicodeString)"; expected -1"); | |
559 } | |
560 | |
561 delete rbc; | |
562 } | |
563 | |
564 // @bug 4081866 | |
565 // | |
566 // Combining characters in different classes not reordered properly. | |
567 // | |
568 void CollationRegressionTest::Test4081866(/* char* par */) | |
569 { | |
570 // These combining characters are all in different classes, | |
571 // so they should be reordered and the strings should compare as equal. | |
572 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; | |
573 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; | |
574 | |
575 UErrorCode status = U_ZERO_ERROR; | |
576 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
577 c->setStrength(Collator::TERTIARY); | |
578 | |
579 // Now that the default collators are set to NO_DECOMPOSITION | |
580 // (as a result of fixing bug 4114077), we must set it explicitly | |
581 // when we're testing reordering behavior. -- lwerner, 5/5/98 | |
582 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
583 | |
584 if (c->compare(s1,s2) != 0) | |
585 { | |
586 errln("Combining chars were not reordered"); | |
587 } | |
588 | |
589 delete c; | |
590 } | |
591 | |
592 // @bug 4087241 | |
593 // | |
594 // string comparison errors in Scandinavian collators | |
595 // | |
596 void CollationRegressionTest::Test4087241(/* char* par */) | |
597 { | |
598 UErrorCode status = U_ZERO_ERROR; | |
599 Locale da_DK("da", "DK"); | |
600 RuleBasedCollator *c = NULL; | |
601 | |
602 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); | |
603 | |
604 if (c == NULL || U_FAILURE(status)) | |
605 { | |
606 errln("Failed to create collator for da_DK locale"); | |
607 delete c; | |
608 return; | |
609 } | |
610 | |
611 c->setStrength(Collator::SECONDARY); | |
612 | |
613 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
614 { | |
615 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae | |
616 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-r
ing | |
617 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-u
mlaut | |
618 }; | |
619 | |
620 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
621 | |
622 delete c; | |
623 } | |
624 | |
625 // @bug 4087243 | |
626 // | |
627 // CollationKey takes ignorable strings into account when it shouldn't | |
628 // | |
629 void CollationRegressionTest::Test4087243(/* char* par */) | |
630 { | |
631 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
632 c->setStrength(Collator::TERTIARY); | |
633 | |
634 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
635 { | |
636 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1
2 3 = 1 2 3 ctrl-A | |
637 }; | |
638 | |
639 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
640 | |
641 delete c; | |
642 } | |
643 | |
644 // @bug 4092260 | |
645 // | |
646 // Mu/micro conflict | |
647 // Micro symbol and greek lowercase letter Mu should sort identically | |
648 // | |
649 void CollationRegressionTest::Test4092260(/* char* par */) | |
650 { | |
651 UErrorCode status = U_ZERO_ERROR; | |
652 Locale el("el", ""); | |
653 Collator *c = NULL; | |
654 | |
655 c = Collator::createInstance(el, status); | |
656 | |
657 if (c == NULL || U_FAILURE(status)) | |
658 { | |
659 errln("Failed to create collator for el locale."); | |
660 delete c; | |
661 return; | |
662 } | |
663 | |
664 // These now have tertiary differences in UCA | |
665 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
666 | |
667 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
668 { | |
669 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} | |
670 }; | |
671 | |
672 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
673 | |
674 delete c; | |
675 } | |
676 | |
677 // @bug 4095316 | |
678 // | |
679 void CollationRegressionTest::Test4095316(/* char* par */) | |
680 { | |
681 UErrorCode status = U_ZERO_ERROR; | |
682 Locale el_GR("el", "GR"); | |
683 Collator *c = Collator::createInstance(el_GR, status); | |
684 | |
685 if (c == NULL || U_FAILURE(status)) | |
686 { | |
687 errln("Failed to create collator for el_GR locale"); | |
688 delete c; | |
689 return; | |
690 } | |
691 // These now have tertiary differences in UCA | |
692 //c->setStrength(Collator::TERTIARY); | |
693 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
694 | |
695 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
696 { | |
697 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} | |
698 }; | |
699 | |
700 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
701 | |
702 delete c; | |
703 } | |
704 | |
705 // @bug 4101940 | |
706 // | |
707 void CollationRegressionTest::Test4101940(/* char* par */) | |
708 { | |
709 UErrorCode status = U_ZERO_ERROR; | |
710 RuleBasedCollator *c = NULL; | |
711 UnicodeString rules = "&9 < a < b"; | |
712 UnicodeString nothing = ""; | |
713 | |
714 c = new RuleBasedCollator(rules, status); | |
715 | |
716 if (c == NULL || U_FAILURE(status)) | |
717 { | |
718 errln("Failed to create RuleBasedCollator"); | |
719 delete c; | |
720 return; | |
721 } | |
722 | |
723 CollationElementIterator *i = c->createCollationElementIterator(nothing); | |
724 i->reset(); | |
725 | |
726 if (i->next(status) != CollationElementIterator::NULLORDER) | |
727 { | |
728 errln("next did not return NULLORDER"); | |
729 } | |
730 | |
731 delete i; | |
732 delete c; | |
733 } | |
734 | |
735 // @bug 4103436 | |
736 // | |
737 // Collator::compare not handling spaces properly | |
738 // | |
739 void CollationRegressionTest::Test4103436(/* char* par */) | |
740 { | |
741 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
742 c->setStrength(Collator::TERTIARY); | |
743 | |
744 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
745 { | |
746 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0
x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, | |
747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0
x63, 0x63, 0x65, 0x73, 0x73, 0} | |
748 }; | |
749 | |
750 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
751 | |
752 delete c; | |
753 } | |
754 | |
755 // @bug 4114076 | |
756 // | |
757 // Collation not Unicode conformant with Hangul syllables | |
758 // | |
759 void CollationRegressionTest::Test4114076(/* char* par */) | |
760 { | |
761 UErrorCode status = U_ZERO_ERROR; | |
762 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
763 c->setStrength(Collator::TERTIARY); | |
764 | |
765 // | |
766 // With Canonical decomposition, Hangul syllables should get decomposed | |
767 // into Jamo, but Jamo characters should not be decomposed into | |
768 // conjoining Jamo | |
769 // | |
770 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
771 { | |
772 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} | |
773 }; | |
774 | |
775 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
776 compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
777 | |
778 // From UTR #15: | |
779 // *In earlier versions of Unicode, jamo characters like ksf | |
780 // had compatibility mappings to kf + sf. These mappings were | |
781 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.
) | |
782 // That is, the following test is obsolete as of 2.1.9 | |
783 | |
784 //obsolete- // With Full decomposition, it should go all the way down to | |
785 //obsolete- // conjoining Jamo characters. | |
786 //obsolete- // | |
787 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN
] = | |
788 //obsolete- { | |
789 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11
c2, 0} | |
790 //obsolete- }; | |
791 //obsolete- | |
792 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); | |
793 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
794 | |
795 delete c; | |
796 } | |
797 | |
798 | |
799 // @bug 4124632 | |
800 // | |
801 // Collator::getCollationKey was hanging on certain character sequences | |
802 // | |
803 void CollationRegressionTest::Test4124632(/* char* par */) | |
804 { | |
805 UErrorCode status = U_ZERO_ERROR; | |
806 Collator *coll = NULL; | |
807 | |
808 coll = Collator::createInstance(Locale::getJapan(), status); | |
809 | |
810 if (coll == NULL || U_FAILURE(status)) | |
811 { | |
812 errln("Failed to create collator for Locale::JAPAN"); | |
813 delete coll; | |
814 return; | |
815 } | |
816 | |
817 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; | |
818 CollationKey key; | |
819 | |
820 coll->getCollationKey(test, key, status); | |
821 | |
822 if (key.isBogus() || U_FAILURE(status)) | |
823 { | |
824 errln("CollationKey creation failed."); | |
825 } | |
826 | |
827 delete coll; | |
828 } | |
829 | |
830 // @bug 4132736 | |
831 // | |
832 // sort order of french words with multiple accents has errors | |
833 // | |
834 void CollationRegressionTest::Test4132736(/* char* par */) | |
835 { | |
836 UErrorCode status = U_ZERO_ERROR; | |
837 | |
838 Collator *c = NULL; | |
839 | |
840 c = Collator::createInstance(Locale::getCanadaFrench(), status); | |
841 c->setStrength(Collator::TERTIARY); | |
842 | |
843 if (c == NULL || U_FAILURE(status)) | |
844 { | |
845 errln("Failed to create a collator for Locale::getCanadaFrench()"); | |
846 delete c; | |
847 return; | |
848 } | |
849 | |
850 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
851 { | |
852 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300,
0}, | |
853 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} | |
854 }; | |
855 | |
856 compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
857 | |
858 delete c; | |
859 } | |
860 | |
861 // @bug 4133509 | |
862 // | |
863 // The sorting using java.text.CollationKey is not in the exact order | |
864 // | |
865 void CollationRegressionTest::Test4133509(/* char* par */) | |
866 { | |
867 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
868 { | |
869 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0
x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x6
9, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72,
0}, | |
870 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x
47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f
, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, | |
871 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x
53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} | |
872 }; | |
873 | |
874 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); | |
875 } | |
876 | |
877 // @bug 4114077 | |
878 // | |
879 // Collation with decomposition off doesn't work for Europe | |
880 // | |
881 void CollationRegressionTest::Test4114077(/* char* par */) | |
882 { | |
883 // Ensure that we get the same results with decomposition off | |
884 // as we do with it on.... | |
885 | |
886 UErrorCode status = U_ZERO_ERROR; | |
887 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
888 c->setStrength(Collator::TERTIARY); | |
889 | |
890 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
891 { | |
892 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0},
// Should be equivalent | |
893 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x6
8, 0x00e9, 0}, | |
894 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, | |
895 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},
// a-ring-acute -> a-ring, acute | |
896 // -> a, ring, acute | |
897 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}
// No reordering --> unequal | |
898 }; | |
899 | |
900 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
901 compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
902 | |
903 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
904 { | |
905 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} //
Reordering --> equal | |
906 }; | |
907 | |
908 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
909 compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
910 | |
911 delete c; | |
912 } | |
913 | |
914 // @bug 4141640 | |
915 // | |
916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) | |
917 // | |
918 void CollationRegressionTest::Test4141640(/* char* par */) | |
919 { | |
920 // | |
921 // Rather than just creating a Swedish collator, we might as well | |
922 // try to instantiate one for every locale available on the system | |
923 // in order to prevent this sort of bug from cropping up in the future | |
924 // | |
925 UErrorCode status = U_ZERO_ERROR; | |
926 int32_t i, localeCount; | |
927 const Locale *locales = Locale::getAvailableLocales(localeCount); | |
928 | |
929 for (i = 0; i < localeCount; i += 1) | |
930 { | |
931 Collator *c = NULL; | |
932 | |
933 status = U_ZERO_ERROR; | |
934 c = Collator::createInstance(locales[i], status); | |
935 | |
936 if (c == NULL || U_FAILURE(status)) | |
937 { | |
938 UnicodeString msg, localeName; | |
939 | |
940 msg += "Could not create collator for locale "; | |
941 msg += locales[i].getName(); | |
942 | |
943 errln(msg); | |
944 } | |
945 | |
946 delete c; | |
947 } | |
948 } | |
949 | |
950 // @bug 4139572 | |
951 // | |
952 // getCollationKey throws exception for spanish text | |
953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 | |
954 // | |
955 void CollationRegressionTest::Test4139572(/* char* par */) | |
956 { | |
957 // | |
958 // Code pasted straight from the bug report | |
959 // (and then translated to C++ ;-) | |
960 // | |
961 // create spanish locale and collator | |
962 UErrorCode status = U_ZERO_ERROR; | |
963 Locale l("es", "es"); | |
964 Collator *col = NULL; | |
965 | |
966 col = Collator::createInstance(l, status); | |
967 | |
968 if (col == NULL || U_FAILURE(status)) | |
969 { | |
970 errln("Failed to create a collator for es_es locale."); | |
971 delete col; | |
972 return; | |
973 } | |
974 | |
975 CollationKey key; | |
976 | |
977 // this spanish phrase kills it! | |
978 col->getCollationKey("Nombre De Objeto", key, status); | |
979 | |
980 if (key.isBogus() || U_FAILURE(status)) | |
981 { | |
982 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); | |
983 } | |
984 | |
985 delete col; | |
986 } | |
987 /* HSYS : RuleBasedCollator::compare() performance enhancements | |
988 compare() does not create CollationElementIterator() anymore.*/ | |
989 | |
990 class My4146160Collator : public RuleBasedCollator | |
991 { | |
992 public: | |
993 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); | |
994 ~My4146160Collator(); | |
995 | |
996 CollationElementIterator *createCollationElementIterator(const UnicodeString
&text) const; | |
997 | |
998 CollationElementIterator *createCollationElementIterator(const CharacterIter
ator &text) const; | |
999 | |
1000 static int32_t count; | |
1001 }; | |
1002 | |
1003 int32_t My4146160Collator::count = 0; | |
1004 | |
1005 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) | |
1006 : RuleBasedCollator(rbc.getRules(), status) | |
1007 { | |
1008 } | |
1009 | |
1010 My4146160Collator::~My4146160Collator() | |
1011 { | |
1012 } | |
1013 | |
1014 CollationElementIterator *My4146160Collator::createCollationElementIterator(cons
t UnicodeString &text) const | |
1015 { | |
1016 count += 1; | |
1017 return RuleBasedCollator::createCollationElementIterator(text); | |
1018 } | |
1019 | |
1020 CollationElementIterator *My4146160Collator::createCollationElementIterator(cons
t CharacterIterator &text) const | |
1021 { | |
1022 count += 1; | |
1023 return RuleBasedCollator::createCollationElementIterator(text); | |
1024 } | |
1025 | |
1026 // @bug 4146160 | |
1027 // | |
1028 // RuleBasedCollator doesn't use createCollationElementIterator internally | |
1029 // | |
1030 void CollationRegressionTest::Test4146160(/* char* par */) | |
1031 { | |
1032 #if 0 | |
1033 // | |
1034 // Use a custom collator class whose createCollationElementIterator | |
1035 // methods increment a count.... | |
1036 // | |
1037 UErrorCode status = U_ZERO_ERROR; | |
1038 CollationKey key; | |
1039 | |
1040 My4146160Collator::count = 0; | |
1041 My4146160Collator *mc = NULL; | |
1042 | |
1043 mc = new My4146160Collator(*en_us, status); | |
1044 | |
1045 if (mc == NULL || U_FAILURE(status)) | |
1046 { | |
1047 errln("Failed to create a My4146160Collator."); | |
1048 delete mc; | |
1049 return; | |
1050 } | |
1051 | |
1052 mc->getCollationKey("1", key, status); | |
1053 | |
1054 if (key.isBogus() || U_FAILURE(status)) | |
1055 { | |
1056 errln("Failure to get a CollationKey from a My4146160Collator."); | |
1057 delete mc; | |
1058 return; | |
1059 } | |
1060 | |
1061 if (My4146160Collator::count < 1) | |
1062 { | |
1063 errln("My4146160Collator::createCollationElementIterator not called for
getCollationKey"); | |
1064 } | |
1065 | |
1066 My4146160Collator::count = 0; | |
1067 mc->compare("1", "2"); | |
1068 | |
1069 if (My4146160Collator::count < 1) | |
1070 { | |
1071 errln("My4146160Collator::createtCollationElementIterator not called for
compare"); | |
1072 } | |
1073 | |
1074 delete mc; | |
1075 #endif | |
1076 } | |
1077 | |
1078 void CollationRegressionTest::Test4179216() { | |
1079 // you can position a CollationElementIterator in the middle of | |
1080 // a contracting character sequence, yielding a bogus collation | |
1081 // element | |
1082 IcuTestErrorCode errorCode(*this, "Test4179216"); | |
1083 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat <
crunchy", errorCode); | |
1084 UnicodeString testText = "church church catcatcher runcrunchynchy"; | |
1085 CollationElementIterator *iter = coll.createCollationElementIterator(testTex
t); | |
1086 | |
1087 // test that the "ch" combination works properly | |
1088 iter->setOffset(4, errorCode); | |
1089 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode))
; | |
1090 | |
1091 iter->reset(); | |
1092 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode))
; | |
1093 | |
1094 iter->setOffset(5, errorCode); | |
1095 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode))
; | |
1096 | |
1097 // Compares and prints only 16-bit primary weights. | |
1098 if (elt4 != elt0 || elt5 != elt0) { | |
1099 errln("The collation elements at positions 0 (0x%04x), " | |
1100 "4 (0x%04x), and 5 (0x%04x) don't match.", | |
1101 elt0, elt4, elt5); | |
1102 } | |
1103 | |
1104 // test that the "cat" combination works properly | |
1105 iter->setOffset(14, errorCode); | |
1106 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
1107 | |
1108 iter->setOffset(15, errorCode); | |
1109 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
1110 | |
1111 iter->setOffset(16, errorCode); | |
1112 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
1113 | |
1114 iter->setOffset(17, errorCode); | |
1115 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
1116 | |
1117 iter->setOffset(18, errorCode); | |
1118 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
1119 | |
1120 iter->setOffset(19, errorCode); | |
1121 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
1122 | |
1123 // Compares and prints only 16-bit primary weights. | |
1124 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 | |
1125 || elt14 != elt18 || elt14 != elt19) { | |
1126 errln("\"cat\" elements don't match: elt14 = 0x%04x, " | |
1127 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " | |
1128 "elt18 = 0x%04x, elt19 = 0x%04x", | |
1129 elt14, elt15, elt16, elt17, elt18, elt19); | |
1130 } | |
1131 | |
1132 // now generate a complete list of the collation elements, | |
1133 // first using next() and then using setOffset(), and | |
1134 // make sure both interfaces return the same set of elements | |
1135 iter->reset(); | |
1136 | |
1137 int32_t elt = iter->next(errorCode); | |
1138 int32_t count = 0; | |
1139 while (elt != CollationElementIterator::NULLORDER) { | |
1140 ++count; | |
1141 elt = iter->next(errorCode); | |
1142 } | |
1143 | |
1144 LocalArray<UnicodeString> nextElements(new UnicodeString[count]); | |
1145 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); | |
1146 int32_t lastPos = 0; | |
1147 | |
1148 iter->reset(); | |
1149 elt = iter->next(errorCode); | |
1150 count = 0; | |
1151 while (elt != CollationElementIterator::NULLORDER) { | |
1152 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->get
Offset()); | |
1153 lastPos = iter->getOffset(); | |
1154 elt = iter->next(errorCode); | |
1155 } | |
1156 int32_t nextElementsLength = count; | |
1157 count = 0; | |
1158 for (int32_t i = 0; i < testText.length(); ) { | |
1159 iter->setOffset(i, errorCode); | |
1160 lastPos = iter->getOffset(); | |
1161 elt = iter->next(errorCode); | |
1162 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter
->getOffset()); | |
1163 i = iter->getOffset(); | |
1164 } | |
1165 for (int32_t i = 0; i < nextElementsLength; i++) { | |
1166 if (nextElements[i] == setOffsetElements[i]) { | |
1167 logln(nextElements[i]); | |
1168 } else { | |
1169 errln(UnicodeString("Error: next() yielded ") + nextElements[i] + | |
1170 ", but setOffset() yielded " + setOffsetElements[i]); | |
1171 } | |
1172 } | |
1173 delete iter; | |
1174 } | |
1175 | |
1176 // Ticket 7189 | |
1177 // | |
1178 // nextSortKeyPart incorrect for EO_S1 collation | |
1179 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t le
n, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { | |
1180 UCharIterator uiter; | |
1181 uint32_t state[2] = { 0, 0 }; | |
1182 int32_t keyLen; | |
1183 int32_t count = 8; | |
1184 | |
1185 uiter_setString(&uiter, text, len); | |
1186 keyLen = 0; | |
1187 while (TRUE) { | |
1188 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[k
eyLen], count, &status); | |
1189 if (U_FAILURE(status)) { | |
1190 return -1; | |
1191 } | |
1192 if (keyPartLen == 0) { | |
1193 break; | |
1194 } | |
1195 keyLen += keyPartLen; | |
1196 } | |
1197 return keyLen; | |
1198 } | |
1199 | |
1200 void CollationRegressionTest::TestT7189() { | |
1201 UErrorCode status = U_ZERO_ERROR; | |
1202 UCollator *coll; | |
1203 uint32_t i; | |
1204 | |
1205 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1206 // "Achter De Hoven" | |
1207 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F
, 0x76, 0x65, 0x6E, 0x00 }, | |
1208 // "ABC" | |
1209 { 0x41, 0x42, 0x43, 0x00 }, | |
1210 // "HELLO world!" | |
1211 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21
, 0x00 } | |
1212 }; | |
1213 | |
1214 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1215 // "Achter de Hoven" | |
1216 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F
, 0x76, 0x65, 0x6E, 0x00 }, | |
1217 // "abc" | |
1218 { 0x61, 0x62, 0x63, 0x00 }, | |
1219 // "hello world!" | |
1220 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21
, 0x00 } | |
1221 }; | |
1222 | |
1223 // Open the collator | |
1224 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); | |
1225 if (U_FAILURE(status)) { | |
1226 errln("Failed to create a collator for short string EO_S1"); | |
1227 return; | |
1228 } | |
1229 | |
1230 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * si
zeof(UChar)); i++) { | |
1231 uint8_t key1[100], key2[100]; | |
1232 int32_t len1, len2; | |
1233 | |
1234 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status
); | |
1235 if (U_FAILURE(status)) { | |
1236 errln(UnicodeString("Failed to get a partial collation key for ") +
text1[i]); | |
1237 break; | |
1238 } | |
1239 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status
); | |
1240 if (U_FAILURE(status)) { | |
1241 errln(UnicodeString("Failed to get a partial collation key for ") +
text2[i]); | |
1242 break; | |
1243 } | |
1244 | |
1245 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { | |
1246 errln(UnicodeString("Failed: Identical key\n") + " text1: " + tex
t1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::he
x(key1, len1)); | |
1247 } else { | |
1248 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i]
+ "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + t
ext2[i] + "\n" + " key2 : " | |
1249 + TestUtility::hex(key2, len2)); | |
1250 } | |
1251 } | |
1252 ucol_close(coll); | |
1253 } | |
1254 | |
1255 void CollationRegressionTest::TestCaseFirstCompression() { | |
1256 RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); | |
1257 UErrorCode status = U_ZERO_ERROR; | |
1258 | |
1259 // default | |
1260 caseFirstCompressionSub(col, "default"); | |
1261 | |
1262 // Upper first | |
1263 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); | |
1264 if (U_FAILURE(status)) { | |
1265 errln("Failed to set UCOL_UPPER_FIRST"); | |
1266 return; | |
1267 } | |
1268 caseFirstCompressionSub(col, "upper first"); | |
1269 | |
1270 // Lower first | |
1271 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); | |
1272 if (U_FAILURE(status)) { | |
1273 errln("Failed to set UCOL_LOWER_FIRST"); | |
1274 return; | |
1275 } | |
1276 caseFirstCompressionSub(col, "lower first"); | |
1277 | |
1278 delete col; | |
1279 } | |
1280 | |
1281 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeStri
ng opt) { | |
1282 const int32_t maxLength = 50; | |
1283 | |
1284 UChar str1[maxLength]; | |
1285 UChar str2[maxLength]; | |
1286 | |
1287 CollationKey key1, key2; | |
1288 | |
1289 for (int32_t len = 1; len <= maxLength; len++) { | |
1290 int32_t i = 0; | |
1291 for (; i < len - 1; i++) { | |
1292 str1[i] = str2[i] = (UChar)0x61; // 'a' | |
1293 } | |
1294 str1[i] = (UChar)0x41; // 'A' | |
1295 str2[i] = (UChar)0x61; // 'a' | |
1296 | |
1297 UErrorCode status = U_ZERO_ERROR; | |
1298 col->getCollationKey(str1, len, key1, status); | |
1299 col->getCollationKey(str2, len, key2, status); | |
1300 | |
1301 UCollationResult cmpKey = key1.compareTo(key2, status); | |
1302 UCollationResult cmpCol = col->compare(str1, len, str2, len, status); | |
1303 | |
1304 if (U_FAILURE(status)) { | |
1305 errln("Error in caseFirstCompressionSub"); | |
1306 } else if (cmpKey != cmpCol) { | |
1307 errln((UnicodeString)"Inconsistent comparison(" + opt | |
1308 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeStr
ing(str2, len) | |
1309 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); | |
1310 } | |
1311 } | |
1312 } | |
1313 | |
1314 void CollationRegressionTest::TestTrailingComment() { | |
1315 // ICU ticket #8070: | |
1316 // Check that the rule parser handles a comment without terminating end-of-l
ine. | |
1317 IcuTestErrorCode errorCode(*this, "TestTrailingComment"); | |
1318 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"),
errorCode); | |
1319 UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63); | |
1320 assertTrue("c<b", coll.compare(c, b) < 0); | |
1321 assertTrue("b<a", coll.compare(b, a) < 0); | |
1322 } | |
1323 | |
1324 void CollationRegressionTest::TestBeforeWithTooStrongAfter() { | |
1325 // ICU ticket #9959: | |
1326 // Forbid rules with a before-reset followed by a stronger relation. | |
1327 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter"); | |
1328 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorC
ode); | |
1329 if(errorCode.isSuccess()) { | |
1330 errln("should forbid before-2-reset followed by primary relation"); | |
1331 } else { | |
1332 errorCode.reset(); | |
1333 } | |
1334 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), er
rorCode); | |
1335 if(errorCode.isSuccess()) { | |
1336 errln("should forbid before-3-reset followed by primary or secondary rel
ation"); | |
1337 } else { | |
1338 errorCode.reset(); | |
1339 } | |
1340 } | |
1341 | |
1342 void CollationRegressionTest::compareArray(Collator &c, | |
1343 const UChar tests[][CollationRegressi
onTest::MAX_TOKEN_LEN], | |
1344 int32_t testCount) | |
1345 { | |
1346 int32_t i; | |
1347 Collator::EComparisonResult expectedResult = Collator::EQUAL; | |
1348 | |
1349 for (i = 0; i < testCount; i += 3) | |
1350 { | |
1351 UnicodeString source(tests[i]); | |
1352 UnicodeString comparison(tests[i + 1]); | |
1353 UnicodeString target(tests[i + 2]); | |
1354 | |
1355 if (comparison == "<") | |
1356 { | |
1357 expectedResult = Collator::LESS; | |
1358 } | |
1359 else if (comparison == ">") | |
1360 { | |
1361 expectedResult = Collator::GREATER; | |
1362 } | |
1363 else if (comparison == "=") | |
1364 { | |
1365 expectedResult = Collator::EQUAL; | |
1366 } | |
1367 else | |
1368 { | |
1369 UnicodeString bogus1("Bogus comparison string \""); | |
1370 UnicodeString bogus2("\""); | |
1371 errln(bogus1 + comparison + bogus2); | |
1372 } | |
1373 | |
1374 Collator::EComparisonResult compareResult = c.compare(source, target); | |
1375 | |
1376 CollationKey sourceKey, targetKey; | |
1377 UErrorCode status = U_ZERO_ERROR; | |
1378 | |
1379 c.getCollationKey(source, sourceKey, status); | |
1380 | |
1381 if (U_FAILURE(status)) | |
1382 { | |
1383 errln("Couldn't get collationKey for source"); | |
1384 continue; | |
1385 } | |
1386 | |
1387 c.getCollationKey(target, targetKey, status); | |
1388 | |
1389 if (U_FAILURE(status)) | |
1390 { | |
1391 errln("Couldn't get collationKey for target"); | |
1392 continue; | |
1393 } | |
1394 | |
1395 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); | |
1396 | |
1397 reportCResult( source, target, sourceKey, targetKey, compareResult, keyR
esult, compareResult, expectedResult ); | |
1398 | |
1399 } | |
1400 } | |
1401 | |
1402 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, Collatio
nElementIterator &i2) | |
1403 { | |
1404 int32_t c1, c2, count = 0; | |
1405 UErrorCode status = U_ZERO_ERROR; | |
1406 | |
1407 do | |
1408 { | |
1409 c1 = i1.next(status); | |
1410 c2 = i2.next(status); | |
1411 | |
1412 if (c1 != c2) | |
1413 { | |
1414 UnicodeString msg, msg1(" "); | |
1415 | |
1416 msg += msg1 + count; | |
1417 msg += ": strength(0x"; | |
1418 appendHex(c1, 8, msg); | |
1419 msg += ") != strength(0x"; | |
1420 appendHex(c2, 8, msg); | |
1421 msg += ")"; | |
1422 | |
1423 errln(msg); | |
1424 break; | |
1425 } | |
1426 | |
1427 count += 1; | |
1428 } | |
1429 while (c1 != CollationElementIterator::NULLORDER); | |
1430 } | |
1431 | |
1432 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
ar* &name, char* /* par */) | |
1433 { | |
1434 if (exec) | |
1435 { | |
1436 logln("Collation Regression Tests: "); | |
1437 } | |
1438 | |
1439 if(en_us == NULL) { | |
1440 dataerrln("Class collator not instantiated"); | |
1441 name = ""; | |
1442 return; | |
1443 } | |
1444 TESTCASE_AUTO_BEGIN; | |
1445 TESTCASE_AUTO(Test4048446); | |
1446 TESTCASE_AUTO(Test4051866); | |
1447 TESTCASE_AUTO(Test4053636); | |
1448 TESTCASE_AUTO(Test4054238); | |
1449 TESTCASE_AUTO(Test4054734); | |
1450 TESTCASE_AUTO(Test4054736); | |
1451 TESTCASE_AUTO(Test4058613); | |
1452 TESTCASE_AUTO(Test4059820); | |
1453 TESTCASE_AUTO(Test4060154); | |
1454 TESTCASE_AUTO(Test4062418); | |
1455 TESTCASE_AUTO(Test4065540); | |
1456 TESTCASE_AUTO(Test4066189); | |
1457 TESTCASE_AUTO(Test4066696); | |
1458 TESTCASE_AUTO(Test4076676); | |
1459 TESTCASE_AUTO(Test4078588); | |
1460 TESTCASE_AUTO(Test4079231); | |
1461 TESTCASE_AUTO(Test4081866); | |
1462 TESTCASE_AUTO(Test4087241); | |
1463 TESTCASE_AUTO(Test4087243); | |
1464 TESTCASE_AUTO(Test4092260); | |
1465 TESTCASE_AUTO(Test4095316); | |
1466 TESTCASE_AUTO(Test4101940); | |
1467 TESTCASE_AUTO(Test4103436); | |
1468 TESTCASE_AUTO(Test4114076); | |
1469 TESTCASE_AUTO(Test4114077); | |
1470 TESTCASE_AUTO(Test4124632); | |
1471 TESTCASE_AUTO(Test4132736); | |
1472 TESTCASE_AUTO(Test4133509); | |
1473 TESTCASE_AUTO(Test4139572); | |
1474 TESTCASE_AUTO(Test4141640); | |
1475 TESTCASE_AUTO(Test4146160); | |
1476 TESTCASE_AUTO(Test4179216); | |
1477 TESTCASE_AUTO(TestT7189); | |
1478 TESTCASE_AUTO(TestCaseFirstCompression); | |
1479 TESTCASE_AUTO(TestTrailingComment); | |
1480 TESTCASE_AUTO(TestBeforeWithTooStrongAfter); | |
1481 TESTCASE_AUTO_END; | |
1482 } | |
1483 | |
1484 #endif /* #if !UCONFIG_NO_COLLATION */ | |
OLD | NEW |