OLD | NEW |
| (Empty) |
1 /* | |
2 ********************************************************************** | |
3 * Copyright (C) 1999-2014, International Business Machines | |
4 * Corporation and others. All Rights Reserved. | |
5 ********************************************************************** | |
6 * Date Name Description | |
7 * 11/10/99 aliu Creation. | |
8 ********************************************************************** | |
9 */ | |
10 | |
11 #include "unicode/utypes.h" | |
12 | |
13 #if !UCONFIG_NO_TRANSLITERATION | |
14 | |
15 #include "transtst.h" | |
16 #include "unicode/locid.h" | |
17 #include "unicode/dtfmtsym.h" | |
18 #include "unicode/normlzr.h" | |
19 #include "unicode/translit.h" | |
20 #include "unicode/uchar.h" | |
21 #include "unicode/unifilt.h" | |
22 #include "unicode/uniset.h" | |
23 #include "unicode/ustring.h" | |
24 #include "unicode/usetiter.h" | |
25 #include "unicode/uscript.h" | |
26 #include "unicode/utf16.h" | |
27 #include "cpdtrans.h" | |
28 #include "nultrans.h" | |
29 #include "rbt.h" | |
30 #include "rbt_pars.h" | |
31 #include "anytrans.h" | |
32 #include "esctrn.h" | |
33 #include "name2uni.h" | |
34 #include "nortrans.h" | |
35 #include "remtrans.h" | |
36 #include "titletrn.h" | |
37 #include "tolowtrn.h" | |
38 #include "toupptrn.h" | |
39 #include "unesctrn.h" | |
40 #include "uni2name.h" | |
41 #include "cstring.h" | |
42 #include "cmemory.h" | |
43 #include <stdio.h> | |
44 | |
45 /*********************************************************************** | |
46 | |
47 HOW TO USE THIS TEST FILE | |
48 -or- | |
49 How I developed on two platforms | |
50 without losing (too much of) my mind | |
51 | |
52 | |
53 1. Add new tests by copying/pasting/changing existing tests. On Java, | |
54 any public void method named Test...() taking no parameters becomes | |
55 a test. On C++, you need to modify the header and add a line to | |
56 the runIndexedTest() dispatch method. | |
57 | |
58 2. Make liberal use of the expect() method; it is your friend. | |
59 | |
60 3. The tests in this file exactly match those in a sister file on the | |
61 other side. The two files are: | |
62 | |
63 icu4j: src/com/ibm/test/translit/TransliteratorTest.java | |
64 icu4c: source/test/intltest/transtst.cpp | |
65 | |
66 ==> THIS IS THE IMPORTANT PART <== | |
67 | |
68 When you add a test in this file, add it in TransliteratorTest.java | |
69 too. Give it the same name and put it in the same relative place. | |
70 This makes maintenance a lot simpler for any poor soul who ends up | |
71 trying to synchronize the tests between icu4j and icu4c. | |
72 | |
73 4. If you MUST enter a test that is NOT paralleled in the sister file, | |
74 then add it in the special non-mirrored section. These are | |
75 labeled | |
76 | |
77 "icu4j ONLY" | |
78 | |
79 or | |
80 | |
81 "icu4c ONLY" | |
82 | |
83 Make sure you document the reason the test is here and not there. | |
84 | |
85 | |
86 Thank you. | |
87 The Management | |
88 ***********************************************************************/ | |
89 | |
90 // Define character constants thusly to be EBCDIC-friendly | |
91 enum { | |
92 LEFT_BRACE=((UChar)0x007B), /*{*/ | |
93 PIPE =((UChar)0x007C), /*|*/ | |
94 ZERO =((UChar)0x0030), /*0*/ | |
95 UPPER_A =((UChar)0x0041) /*A*/ | |
96 }; | |
97 | |
98 TransliteratorTest::TransliteratorTest() | |
99 : DESERET_DEE((UChar32)0x10414), | |
100 DESERET_dee((UChar32)0x1043C) | |
101 { | |
102 } | |
103 | |
104 TransliteratorTest::~TransliteratorTest() {} | |
105 | |
106 void | |
107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec, | |
108 const char* &name, char* /*par*/) { | |
109 switch (index) { | |
110 TESTCASE(0,TestInstantiation); | |
111 TESTCASE(1,TestSimpleRules); | |
112 TESTCASE(2,TestRuleBasedInverse); | |
113 TESTCASE(3,TestKeyboard); | |
114 TESTCASE(4,TestKeyboard2); | |
115 TESTCASE(5,TestKeyboard3); | |
116 TESTCASE(6,TestArabic); | |
117 TESTCASE(7,TestCompoundKana); | |
118 TESTCASE(8,TestCompoundHex); | |
119 TESTCASE(9,TestFiltering); | |
120 TESTCASE(10,TestInlineSet); | |
121 TESTCASE(11,TestPatternQuoting); | |
122 TESTCASE(12,TestJ277); | |
123 TESTCASE(13,TestJ243); | |
124 TESTCASE(14,TestJ329); | |
125 TESTCASE(15,TestSegments); | |
126 TESTCASE(16,TestCursorOffset); | |
127 TESTCASE(17,TestArbitraryVariableValues); | |
128 TESTCASE(18,TestPositionHandling); | |
129 TESTCASE(19,TestHiraganaKatakana); | |
130 TESTCASE(20,TestCopyJ476); | |
131 TESTCASE(21,TestAnchors); | |
132 TESTCASE(22,TestInterIndic); | |
133 TESTCASE(23,TestFilterIDs); | |
134 TESTCASE(24,TestCaseMap); | |
135 TESTCASE(25,TestNameMap); | |
136 TESTCASE(26,TestLiberalizedID); | |
137 TESTCASE(27,TestCreateInstance); | |
138 TESTCASE(28,TestNormalizationTransliterator); | |
139 TESTCASE(29,TestCompoundRBT); | |
140 TESTCASE(30,TestCompoundFilter); | |
141 TESTCASE(31,TestRemove); | |
142 TESTCASE(32,TestToRules); | |
143 TESTCASE(33,TestContext); | |
144 TESTCASE(34,TestSupplemental); | |
145 TESTCASE(35,TestQuantifier); | |
146 TESTCASE(36,TestSTV); | |
147 TESTCASE(37,TestCompoundInverse); | |
148 TESTCASE(38,TestNFDChainRBT); | |
149 TESTCASE(39,TestNullInverse); | |
150 TESTCASE(40,TestAliasInverseID); | |
151 TESTCASE(41,TestCompoundInverseID); | |
152 TESTCASE(42,TestUndefinedVariable); | |
153 TESTCASE(43,TestEmptyContext); | |
154 TESTCASE(44,TestCompoundFilterID); | |
155 TESTCASE(45,TestPropertySet); | |
156 TESTCASE(46,TestNewEngine); | |
157 TESTCASE(47,TestQuantifiedSegment); | |
158 TESTCASE(48,TestDevanagariLatinRT); | |
159 TESTCASE(49,TestTeluguLatinRT); | |
160 TESTCASE(50,TestCompoundLatinRT); | |
161 TESTCASE(51,TestSanskritLatinRT); | |
162 TESTCASE(52,TestLocaleInstantiation); | |
163 TESTCASE(53,TestTitleAccents); | |
164 TESTCASE(54,TestLocaleResource); | |
165 TESTCASE(55,TestParseError); | |
166 TESTCASE(56,TestOutputSet); | |
167 TESTCASE(57,TestVariableRange); | |
168 TESTCASE(58,TestInvalidPostContext); | |
169 TESTCASE(59,TestIDForms); | |
170 TESTCASE(60,TestToRulesMark); | |
171 TESTCASE(61,TestEscape); | |
172 TESTCASE(62,TestAnchorMasking); | |
173 TESTCASE(63,TestDisplayName); | |
174 TESTCASE(64,TestSpecialCases); | |
175 #if !UCONFIG_NO_FILE_IO | |
176 TESTCASE(65,TestIncrementalProgress); | |
177 #endif | |
178 TESTCASE(66,TestSurrogateCasing); | |
179 TESTCASE(67,TestFunction); | |
180 TESTCASE(68,TestInvalidBackRef); | |
181 TESTCASE(69,TestMulticharStringSet); | |
182 TESTCASE(70,TestUserFunction); | |
183 TESTCASE(71,TestAnyX); | |
184 TESTCASE(72,TestSourceTargetSet); | |
185 TESTCASE(73,TestGurmukhiDevanagari); | |
186 TESTCASE(74,TestPatternWhiteSpace); | |
187 TESTCASE(75,TestAllCodepoints); | |
188 TESTCASE(76,TestBoilerplate); | |
189 TESTCASE(77,TestAlternateSyntax); | |
190 TESTCASE(78,TestBeginEnd); | |
191 TESTCASE(79,TestBeginEndToRules); | |
192 TESTCASE(80,TestRegisterAlias); | |
193 TESTCASE(81,TestRuleStripping); | |
194 TESTCASE(82,TestHalfwidthFullwidth); | |
195 TESTCASE(83,TestThai); | |
196 TESTCASE(84,TestAny); | |
197 default: name = ""; break; | |
198 } | |
199 } | |
200 | |
201 /** | |
202 * Make sure every system transliterator can be instantiated. | |
203 * | |
204 * ALSO test that the result of toRules() for each rule is a valid | |
205 * rule. Do this here so we don't have to have another test that | |
206 * instantiates everything as well. | |
207 */ | |
208 void TransliteratorTest::TestInstantiation() { | |
209 UErrorCode ec = U_ZERO_ERROR; | |
210 StringEnumeration* avail = Transliterator::getAvailableIDs(ec); | |
211 assertSuccess("getAvailableIDs()", ec); | |
212 assertTrue("getAvailableIDs()!=NULL", avail!=NULL); | |
213 int32_t n = Transliterator::countAvailableIDs(); | |
214 assertTrue("getAvailableIDs().count()==countAvailableIDs()", | |
215 avail->count(ec) == n); | |
216 assertSuccess("count()", ec); | |
217 UnicodeString name; | |
218 for (int32_t i=0; i<n; ++i) { | |
219 const UnicodeString& id = *avail->snext(ec); | |
220 if (!assertSuccess("snext()", ec) || | |
221 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) { | |
222 break; | |
223 } | |
224 UnicodeString id2 = Transliterator::getAvailableID(i); | |
225 if (id.length() < 1) { | |
226 errln(UnicodeString("FAIL: getAvailableID(") + | |
227 i + ") returned empty string"); | |
228 continue; | |
229 } | |
230 if (id != id2) { | |
231 errln(UnicodeString("FAIL: getAvailableID(") + | |
232 i + ") != getAvailableIDs().snext()"); | |
233 continue; | |
234 } | |
235 UParseError parseError; | |
236 UErrorCode status = U_ZERO_ERROR; | |
237 Transliterator* t = Transliterator::createInstance(id, | |
238 UTRANS_FORWARD, parseError,status); | |
239 name.truncate(0); | |
240 Transliterator::getDisplayName(id, name); | |
241 if (t == 0) { | |
242 #if UCONFIG_NO_BREAK_ITERATION | |
243 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. | |
244 if (id.compare((UnicodeString)"Thai-Latin") != 0) | |
245 #endif | |
246 dataerrln(UnicodeString("FAIL: Couldn't create ") + id + | |
247 /*", parse error " + parseError.code +*/ | |
248 ", line " + parseError.line + | |
249 ", offset " + parseError.offset + | |
250 ", pre-context " + prettify(parseError.preContext, TRUE) + | |
251 ", post-context " +prettify(parseError.postContext,TRUE) + | |
252 ", Error: " + u_errorName(status)); | |
253 // When createInstance fails, it deletes the failing | |
254 // entry from the available ID list. We detect this | |
255 // here by looking for a change in countAvailableIDs. | |
256 int32_t nn = Transliterator::countAvailableIDs(); | |
257 if (nn == (n - 1)) { | |
258 n = nn; | |
259 --i; // Compensate for deleted entry | |
260 } | |
261 } else { | |
262 logln(UnicodeString("OK: ") + name + " (" + id + ")"); | |
263 | |
264 // Now test toRules | |
265 UnicodeString rules; | |
266 t->toRules(rules, TRUE); | |
267 Transliterator *u = Transliterator::createFromRules("x", | |
268 rules, UTRANS_FORWARD, parseError,status); | |
269 if (u == 0) { | |
270 errln(UnicodeString("FAIL: ") + id + | |
271 ".createFromRules() => bad rules" + | |
272 /*", parse error " + parseError.code +*/ | |
273 ", line " + parseError.line + | |
274 ", offset " + parseError.offset + | |
275 ", context " + prettify(parseError.preContext, TRUE) + | |
276 ", rules: " + prettify(rules, TRUE)); | |
277 } else { | |
278 delete u; | |
279 } | |
280 delete t; | |
281 } | |
282 } | |
283 assertTrue("snext()==NULL", avail->snext(ec)==NULL); | |
284 assertSuccess("snext()", ec); | |
285 delete avail; | |
286 | |
287 // Now test the failure path | |
288 UParseError parseError; | |
289 UErrorCode status = U_ZERO_ERROR; | |
290 UnicodeString id("<Not a valid Transliterator ID>"); | |
291 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parse
Error, status); | |
292 if (t != 0) { | |
293 errln("FAIL: " + id + " returned a transliterator"); | |
294 delete t; | |
295 } else { | |
296 logln("OK: Bogus ID handled properly"); | |
297 } | |
298 } | |
299 | |
300 void TransliteratorTest::TestSimpleRules(void) { | |
301 /* Example: rules 1. ab>x|y | |
302 * 2. yc>z | |
303 * | |
304 * []|eabcd start - no match, copy e to tranlated buffer | |
305 * [e]|abcd match rule 1 - copy output & adjust cursor | |
306 * [ex|y]cd match rule 2 - copy output & adjust cursor | |
307 * [exz]|d no match, copy d to transliterated buffer | |
308 * [exzd]| done | |
309 */ | |
310 expect(UnicodeString("ab>x|y;", "") + | |
311 "yc>z", | |
312 "eabcd", "exzd"); | |
313 | |
314 /* Another set of rules: | |
315 * 1. ab>x|yzacw | |
316 * 2. za>q | |
317 * 3. qc>r | |
318 * 4. cw>n | |
319 * | |
320 * []|ab Rule 1 | |
321 * [x|yzacw] No match | |
322 * [xy|zacw] Rule 2 | |
323 * [xyq|cw] Rule 4 | |
324 * [xyqn]| Done | |
325 */ | |
326 expect(UnicodeString("ab>x|yzacw;") + | |
327 "za>q;" + | |
328 "qc>r;" + | |
329 "cw>n", | |
330 "ab", "xyqn"); | |
331 | |
332 /* Test categories | |
333 */ | |
334 UErrorCode status = U_ZERO_ERROR; | |
335 UParseError parseError; | |
336 Transliterator *t = Transliterator::createFromRules( | |
337 "<ID>", | |
338 UnicodeString("$dummy=").append((UChar)0xE100) + | |
339 UnicodeString(";" | |
340 "$vowel=[aeiouAEIOU];" | |
341 "$lu=[:Lu:];" | |
342 "$vowel } $lu > '!';" | |
343 "$vowel > '&';" | |
344 "'!' { $lu > '^';" | |
345 "$lu > '*';" | |
346 "a > ERROR", ""), | |
347 UTRANS_FORWARD, parseError, | |
348 status); | |
349 if (U_FAILURE(status)) { | |
350 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status)); | |
351 return; | |
352 } | |
353 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); | |
354 delete t; | |
355 } | |
356 | |
357 /** | |
358 * Test inline set syntax and set variable syntax. | |
359 */ | |
360 void TransliteratorTest::TestInlineSet(void) { | |
361 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); | |
362 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); | |
363 | |
364 expect(UnicodeString( | |
365 "$digit = [0-9];" | |
366 "$alpha = [a-zA-Z];" | |
367 "$alphanumeric = [$digit $alpha];" // *** | |
368 "$special = [^$alphanumeric];" // *** | |
369 "$alphanumeric > '-';" | |
370 "$special > '*';", ""), | |
371 | |
372 "thx-1138", "---*----"); | |
373 } | |
374 | |
375 /** | |
376 * Create some inverses and confirm that they work. We have to be | |
377 * careful how we do this, since the inverses will not be true | |
378 * inverses -- we can't throw any random string at the composition | |
379 * of the transliterators and expect the identity function. F x | |
380 * F' != I. However, if we are careful about the input, we will | |
381 * get the expected results. | |
382 */ | |
383 void TransliteratorTest::TestRuleBasedInverse(void) { | |
384 UnicodeString RULES = | |
385 UnicodeString("abc>zyx;") + | |
386 "ab>yz;" + | |
387 "bc>zx;" + | |
388 "ca>xy;" + | |
389 "a>x;" + | |
390 "b>y;" + | |
391 "c>z;" + | |
392 | |
393 "abc<zyx;" + | |
394 "ab<yz;" + | |
395 "bc<zx;" + | |
396 "ca<xy;" + | |
397 "a<x;" + | |
398 "b<y;" + | |
399 "c<z;" + | |
400 | |
401 ""; | |
402 | |
403 const char* DATA[] = { | |
404 // Careful here -- random strings will not work. If we keep | |
405 // the left side to the domain and the right side to the range | |
406 // we will be okay though (left, abc; right xyz). | |
407 "a", "x", | |
408 "abcacab", "zyxxxyy", | |
409 "caccb", "xyzzy", | |
410 }; | |
411 | |
412 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); | |
413 | |
414 UErrorCode status = U_ZERO_ERROR; | |
415 UParseError parseError; | |
416 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES, | |
417 UTRANS_FORWARD, parseError, status); | |
418 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES, | |
419 UTRANS_REVERSE, parseError, status); | |
420 if (U_FAILURE(status)) { | |
421 errln("FAIL: RBT constructor failed"); | |
422 return; | |
423 } | |
424 for (int32_t i=0; i<DATA_length; i+=2) { | |
425 expect(*fwd, DATA[i], DATA[i+1]); | |
426 expect(*rev, DATA[i+1], DATA[i]); | |
427 } | |
428 delete fwd; | |
429 delete rev; | |
430 } | |
431 | |
432 /** | |
433 * Basic test of keyboard. | |
434 */ | |
435 void TransliteratorTest::TestKeyboard(void) { | |
436 UParseError parseError; | |
437 UErrorCode status = U_ZERO_ERROR; | |
438 Transliterator *t = Transliterator::createFromRules("<ID>", | |
439 UnicodeString("psch>Y;") | |
440 +"ps>y;" | |
441 +"ch>x;" | |
442 +"a>A;", | |
443 UTRANS_FORWARD, parseError, | |
444 status); | |
445 if (U_FAILURE(status)) { | |
446 errln("FAIL: RBT constructor failed"); | |
447 return; | |
448 } | |
449 const char* DATA[] = { | |
450 // insertion, buffer | |
451 "a", "A", | |
452 "p", "Ap", | |
453 "s", "Aps", | |
454 "c", "Apsc", | |
455 "a", "AycA", | |
456 "psch", "AycAY", | |
457 0, "AycAY", // null means finishKeyboardTransliteration | |
458 }; | |
459 | |
460 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); | |
461 delete t; | |
462 } | |
463 | |
464 /** | |
465 * Basic test of keyboard with cursor. | |
466 */ | |
467 void TransliteratorTest::TestKeyboard2(void) { | |
468 UParseError parseError; | |
469 UErrorCode status = U_ZERO_ERROR; | |
470 Transliterator *t = Transliterator::createFromRules("<ID>", | |
471 UnicodeString("ych>Y;") | |
472 +"ps>|y;" | |
473 +"ch>x;" | |
474 +"a>A;", | |
475 UTRANS_FORWARD, parseError, | |
476 status); | |
477 if (U_FAILURE(status)) { | |
478 errln("FAIL: RBT constructor failed"); | |
479 return; | |
480 } | |
481 const char* DATA[] = { | |
482 // insertion, buffer | |
483 "a", "A", | |
484 "p", "Ap", | |
485 "s", "Aps", // modified for rollback - "Ay", | |
486 "c", "Apsc", // modified for rollback - "Ayc", | |
487 "a", "AycA", | |
488 "p", "AycAp", | |
489 "s", "AycAps", // modified for rollback - "AycAy", | |
490 "c", "AycApsc", // modified for rollback - "AycAyc", | |
491 "h", "AycAY", | |
492 0, "AycAY", // null means finishKeyboardTransliteration | |
493 }; | |
494 | |
495 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); | |
496 delete t; | |
497 } | |
498 | |
499 /** | |
500 * Test keyboard transliteration with back-replacement. | |
501 */ | |
502 void TransliteratorTest::TestKeyboard3(void) { | |
503 // We want th>z but t>y. Furthermore, during keyboard | |
504 // transliteration we want t>y then yh>z if t, then h are | |
505 // typed. | |
506 UnicodeString RULES("t>|y;" | |
507 "yh>z;"); | |
508 | |
509 const char* DATA[] = { | |
510 // Column 1: characters to add to buffer (as if typed) | |
511 // Column 2: expected appearance of buffer after | |
512 // keyboard xliteration. | |
513 "a", "a", | |
514 "b", "ab", | |
515 "t", "abt", // modified for rollback - "aby", | |
516 "c", "abyc", | |
517 "t", "abyct", // modified for rollback - "abycy", | |
518 "h", "abycz", | |
519 0, "abycz", // null means finishKeyboardTransliteration | |
520 }; | |
521 | |
522 UParseError parseError; | |
523 UErrorCode status = U_ZERO_ERROR; | |
524 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FO
RWARD, parseError, status); | |
525 if (U_FAILURE(status)) { | |
526 errln("FAIL: RBT constructor failed"); | |
527 return; | |
528 } | |
529 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); | |
530 delete t; | |
531 } | |
532 | |
533 void TransliteratorTest::keyboardAux(const Transliterator& t, | |
534 const char* DATA[], int32_t DATA_length) { | |
535 UErrorCode status = U_ZERO_ERROR; | |
536 UTransPosition index={0, 0, 0, 0}; | |
537 UnicodeString s; | |
538 for (int32_t i=0; i<DATA_length; i+=2) { | |
539 UnicodeString log; | |
540 if (DATA[i] != 0) { | |
541 log = s + " + " | |
542 + DATA[i] | |
543 + " -> "; | |
544 t.transliterate(s, index, DATA[i], status); | |
545 } else { | |
546 log = s + " => "; | |
547 t.finishTransliteration(s, index); | |
548 } | |
549 // Show the start index '{' and the cursor '|' | |
550 UnicodeString a, b, c; | |
551 s.extractBetween(0, index.contextStart, a); | |
552 s.extractBetween(index.contextStart, index.start, b); | |
553 s.extractBetween(index.start, s.length(), c); | |
554 log.append(a). | |
555 append((UChar)LEFT_BRACE). | |
556 append(b). | |
557 append((UChar)PIPE). | |
558 append(c); | |
559 if (s == DATA[i+1] && U_SUCCESS(status)) { | |
560 logln(log); | |
561 } else { | |
562 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]); | |
563 } | |
564 } | |
565 } | |
566 | |
567 void TransliteratorTest::TestArabic(void) { | |
568 // Test disabled for 2.0 until new Arabic transliterator can be written. | |
569 // /* | |
570 // const char* DATA[] = { | |
571 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+ | |
572 // "\u0627\u0644\u0644\u063a\u0629\u0020"+ | |
573 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+ | |
574 // "\u0628\u0628\u0646\u0638\u0645\u0020"+ | |
575 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+ | |
576 // "\u062c\u0645\u064a\u0644\u0629", | |
577 // }; | |
578 // */ | |
579 // | |
580 // UChar ar_raw[] = { | |
581 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627, | |
582 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644, | |
583 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, | |
584 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643, | |
585 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, | |
586 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0 | |
587 // }; | |
588 // UnicodeString ar(ar_raw); | |
589 // UErrorCode status=U_ZERO_ERROR; | |
590 // UParseError parseError; | |
591 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_
FORWARD, parseError, status); | |
592 // if (t == 0) { | |
593 // errln("FAIL: createInstance failed"); | |
594 // return; | |
595 // } | |
596 // expect(*t, "Arabic", ar); | |
597 // delete t; | |
598 } | |
599 | |
600 /** | |
601 * Compose the Kana transliterator forward and reverse and try | |
602 * some strings that should come out unchanged. | |
603 */ | |
604 void TransliteratorTest::TestCompoundKana(void) { | |
605 UParseError parseError; | |
606 UErrorCode status = U_ZERO_ERROR; | |
607 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-
Latin", UTRANS_FORWARD, parseError, status); | |
608 if (t == 0) { | |
609 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed -
%s", u_errorName(status)); | |
610 } else { | |
611 expect(*t, "aaaaa", "aaaaa"); | |
612 delete t; | |
613 } | |
614 } | |
615 | |
616 /** | |
617 * Compose the hex transliterators forward and reverse. | |
618 */ | |
619 void TransliteratorTest::TestCompoundHex(void) { | |
620 UParseError parseError; | |
621 UErrorCode status = U_ZERO_ERROR; | |
622 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError, status); | |
623 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError, status); | |
624 Transliterator* transab[] = { a, b }; | |
625 Transliterator* transba[] = { b, a }; | |
626 if (a == 0 || b == 0) { | |
627 errln("FAIL: construction failed"); | |
628 delete a; | |
629 delete b; | |
630 return; | |
631 } | |
632 // Do some basic tests of a | |
633 expect(*a, "01", UnicodeString("\\u0030\\u0031", "")); | |
634 // Do some basic tests of b | |
635 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01"); | |
636 | |
637 Transliterator* ab = new CompoundTransliterator(transab, 2); | |
638 UnicodeString s("abcde", ""); | |
639 expect(*ab, s, s); | |
640 | |
641 UnicodeString str(s); | |
642 a->transliterate(str); | |
643 Transliterator* ba = new CompoundTransliterator(transba, 2); | |
644 expect(*ba, str, str); | |
645 | |
646 delete ab; | |
647 delete ba; | |
648 delete a; | |
649 delete b; | |
650 } | |
651 | |
652 int gTestFilterClassID = 0; | |
653 /** | |
654 * Used by TestFiltering(). | |
655 */ | |
656 class TestFilter : public UnicodeFilter { | |
657 virtual UnicodeFunctor* clone() const { | |
658 return new TestFilter(*this); | |
659 } | |
660 virtual UBool contains(UChar32 c) const { | |
661 return c != (UChar)0x0063 /*c*/; | |
662 } | |
663 // Stubs | |
664 virtual UnicodeString& toPattern(UnicodeString& result, | |
665 UBool /*escapeUnprintable*/) const { | |
666 return result; | |
667 } | |
668 virtual UBool matchesIndexValue(uint8_t /*v*/) const { | |
669 return FALSE; | |
670 } | |
671 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {} | |
672 public: | |
673 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; } | |
674 }; | |
675 | |
676 /** | |
677 * Do some basic tests of filtering. | |
678 */ | |
679 void TransliteratorTest::TestFiltering(void) { | |
680 UParseError parseError; | |
681 UErrorCode status = U_ZERO_ERROR; | |
682 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWA
RD, parseError, status); | |
683 if (hex == 0) { | |
684 errln("FAIL: createInstance(Any-Hex) failed"); | |
685 return; | |
686 } | |
687 hex->adoptFilter(new TestFilter()); | |
688 UnicodeString s("abcde"); | |
689 hex->transliterate(s); | |
690 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", ""); | |
691 if (s == exp) { | |
692 logln(UnicodeString("Ok: \"") + exp + "\""); | |
693 } else { | |
694 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\""); | |
695 } | |
696 | |
697 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J. | |
698 UnicodeFilter *f = hex->orphanFilter(); | |
699 if (f == NULL){ | |
700 errln("FAIL: orphanFilter() should get a UnicodeFilter"); | |
701 } else { | |
702 delete f; | |
703 } | |
704 delete hex; | |
705 } | |
706 | |
707 /** | |
708 * Test anchors | |
709 */ | |
710 void TransliteratorTest::TestAnchors(void) { | |
711 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""), | |
712 "aaa", | |
713 "012"); | |
714 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""), | |
715 "aaa", | |
716 "012"); | |
717 expect(UnicodeString("^ab > 01 ;" | |
718 " ab > |8 ;" | |
719 " b > k ;" | |
720 " 8x$ > 45 ;" | |
721 " 8x > 77 ;", ""), | |
722 | |
723 "ababbabxabx", | |
724 "018k7745"); | |
725 expect(UnicodeString("$s = [z$] ;" | |
726 "$s{ab > 01 ;" | |
727 " ab > |8 ;" | |
728 " b > k ;" | |
729 " 8x}$s > 45 ;" | |
730 " 8x > 77 ;", ""), | |
731 | |
732 "abzababbabxzabxabx", | |
733 "01z018k45z01x45"); | |
734 } | |
735 | |
736 /** | |
737 * Test pattern quoting and escape mechanisms. | |
738 */ | |
739 void TransliteratorTest::TestPatternQuoting(void) { | |
740 // Array of 3n items | |
741 // Each item is <rules>, <input>, <expected output> | |
742 const UnicodeString DATA[] = { | |
743 UnicodeString(UChar(0x4E01)) + ">'[male adult]'", | |
744 UnicodeString(UChar(0x4E01)), | |
745 "[male adult]" | |
746 }; | |
747 | |
748 for (int32_t i=0; i<3; i+=3) { | |
749 logln(UnicodeString("Pattern: ") + prettify(DATA[i])); | |
750 UParseError parseError; | |
751 UErrorCode status = U_ZERO_ERROR; | |
752 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTR
ANS_FORWARD, parseError, status); | |
753 if (U_FAILURE(status)) { | |
754 errln("RBT constructor failed"); | |
755 } else { | |
756 expect(*t, DATA[i+1], DATA[i+2]); | |
757 } | |
758 delete t; | |
759 } | |
760 } | |
761 | |
762 /** | |
763 * Regression test for bugs found in Greek transliteration. | |
764 */ | |
765 void TransliteratorTest::TestJ277(void) { | |
766 UErrorCode status = U_ZERO_ERROR; | |
767 UParseError parseError; | |
768 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]
Remove; NFC", UTRANS_FORWARD, parseError, status); | |
769 if (gl == NULL) { | |
770 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_erro
rName(status)); | |
771 return; | |
772 } | |
773 | |
774 UChar sigma = 0x3C3; | |
775 UChar upsilon = 0x3C5; | |
776 UChar nu = 0x3BD; | |
777 // UChar PHI = 0x3A6; | |
778 UChar alpha = 0x3B1; | |
779 // UChar omega = 0x3C9; | |
780 // UChar omicron = 0x3BF; | |
781 // UChar epsilon = 0x3B5; | |
782 | |
783 // sigma upsilon nu -> syn | |
784 UnicodeString syn; | |
785 syn.append(sigma).append(upsilon).append(nu); | |
786 expect(*gl, syn, "syn"); | |
787 | |
788 // sigma alpha upsilon nu -> saun | |
789 UnicodeString sayn; | |
790 sayn.append(sigma).append(alpha).append(upsilon).append(nu); | |
791 expect(*gl, sayn, "saun"); | |
792 | |
793 // Again, using a smaller rule set | |
794 UnicodeString rules( | |
795 "$alpha = \\u03B1;" | |
796 "$nu = \\u03BD;" | |
797 "$sigma = \\u03C3;" | |
798 "$ypsilon = \\u03C5;" | |
799 "$vowel = [aeiouAEIOU$alpha$ypsilon];" | |
800 "s <> $sigma;" | |
801 "a <> $alpha;" | |
802 "u <> $vowel { $ypsilon;" | |
803 "y <> $ypsilon;" | |
804 "n <> $nu;", | |
805 ""); | |
806 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS
_REVERSE, parseError, status); | |
807 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); r
eturn; } | |
808 expect(*mini, syn, "syn"); | |
809 expect(*mini, sayn, "saun"); | |
810 delete mini; | |
811 mini = NULL; | |
812 | |
813 #if !UCONFIG_NO_FORMATTING | |
814 // Transliterate the Greek locale data | |
815 Locale el("el"); | |
816 DateFormatSymbols syms(el, status); | |
817 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); r
eturn; } | |
818 int32_t i, count; | |
819 const UnicodeString* data = syms.getMonths(count); | |
820 for (i=0; i<count; ++i) { | |
821 if (data[i].length() == 0) { | |
822 continue; | |
823 } | |
824 UnicodeString out(data[i]); | |
825 gl->transliterate(out); | |
826 UBool ok = TRUE; | |
827 if (data[i].length() >= 2 && out.length() >= 2 && | |
828 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { | |
829 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { | |
830 ok = FALSE; | |
831 } | |
832 } | |
833 if (ok) { | |
834 logln(prettify(data[i] + " -> " + out)); | |
835 } else { | |
836 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); | |
837 } | |
838 } | |
839 #endif | |
840 | |
841 delete gl; | |
842 } | |
843 | |
844 /** | |
845 * Prefix, suffix support in hex transliterators | |
846 */ | |
847 void TransliteratorTest::TestJ243(void) { | |
848 UErrorCode ec = U_ZERO_ERROR; | |
849 | |
850 // Test default Hex-Any, which should handle | |
851 // \u, \U, u+, and U+ | |
852 Transliterator *hex = | |
853 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec); | |
854 if (assertSuccess("getInstance", ec)) { | |
855 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A
+B,CuDz"); | |
856 } | |
857 delete hex; | |
858 | |
859 // // Try a custom Hex-Unicode | |
860 // // \uXXXX and &#xXXXX; | |
861 // ec = U_ZERO_ERROR; | |
862 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""
), ec); | |
863 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x01&
#x0032;3", ""), | |
864 // "abcd5fx0123"); | |
865 // // Try custom Any-Hex (default is tested elsewhere) | |
866 // ec = U_ZERO_ERROR; | |
867 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec); | |
868 // expect(hex3, "012", "012"); | |
869 } | |
870 | |
871 /** | |
872 * Parsers need better syntax error messages. | |
873 */ | |
874 void TransliteratorTest::TestJ329(void) { | |
875 | |
876 struct { UBool containsErrors; const char* rule; } DATA[] = { | |
877 { FALSE, "a > b; c > d" }, | |
878 { TRUE, "a > b; no operator; c > d" }, | |
879 }; | |
880 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); | |
881 | |
882 for (int32_t i=0; i<DATA_length; ++i) { | |
883 UErrorCode status = U_ZERO_ERROR; | |
884 UParseError parseError; | |
885 Transliterator *rbt = Transliterator::createFromRules("<ID>", | |
886 DATA[i].rule, | |
887 UTRANS_FORWARD, | |
888 parseError, | |
889 status); | |
890 UBool gotError = U_FAILURE(status); | |
891 UnicodeString desc(DATA[i].rule); | |
892 desc.append(gotError ? " -> error" : " -> no error"); | |
893 if (gotError) { | |
894 desc = desc + ", ParseError code=" + u_errorName(status) + | |
895 " line=" + parseError.line + | |
896 " offset=" + parseError.offset + | |
897 " context=" + parseError.preContext; | |
898 } | |
899 if (gotError == DATA[i].containsErrors) { | |
900 logln(UnicodeString("Ok: ") + desc); | |
901 } else { | |
902 errln(UnicodeString("FAIL: ") + desc); | |
903 } | |
904 delete rbt; | |
905 } | |
906 } | |
907 | |
908 /** | |
909 * Test segments and segment references. | |
910 */ | |
911 void TransliteratorTest::TestSegments(void) { | |
912 // Array of 3n items | |
913 // Each item is <rules>, <input>, <expected output> | |
914 UnicodeString DATA[] = { | |
915 "([a-z]) '.' ([0-9]) > $2 '-' $1", | |
916 "abc.123.xyz.456", | |
917 "ab1-c23.xy4-z56", | |
918 | |
919 // nested | |
920 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;", | |
921 "a1 b2", | |
922 "a1.a.1 b2.b.2", | |
923 }; | |
924 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); | |
925 | |
926 for (int32_t i=0; i<DATA_length; i+=3) { | |
927 logln("Pattern: " + prettify(DATA[i])); | |
928 UParseError parseError; | |
929 UErrorCode status = U_ZERO_ERROR; | |
930 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRAN
S_FORWARD, parseError, status); | |
931 if (U_FAILURE(status)) { | |
932 errln("FAIL: RBT constructor"); | |
933 } else { | |
934 expect(*t, DATA[i+1], DATA[i+2]); | |
935 } | |
936 delete t; | |
937 } | |
938 } | |
939 | |
940 /** | |
941 * Test cursor positioning outside of the key | |
942 */ | |
943 void TransliteratorTest::TestCursorOffset(void) { | |
944 // Array of 3n items | |
945 // Each item is <rules>, <input>, <expected output> | |
946 UnicodeString DATA[] = { | |
947 "pre {alpha} post > | @ ALPHA ;" | |
948 "eALPHA > beta ;" | |
949 "pre {beta} post > BETA @@ | ;" | |
950 "post > xyz", | |
951 | |
952 "prealphapost prebetapost", | |
953 | |
954 "prbetaxyz preBETApost", | |
955 }; | |
956 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); | |
957 | |
958 for (int32_t i=0; i<DATA_length; i+=3) { | |
959 logln("Pattern: " + prettify(DATA[i])); | |
960 UParseError parseError; | |
961 UErrorCode status = U_ZERO_ERROR; | |
962 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTR
ANS_FORWARD, parseError, status); | |
963 if (U_FAILURE(status)) { | |
964 errln("FAIL: RBT constructor"); | |
965 } else { | |
966 expect(*t, DATA[i+1], DATA[i+2]); | |
967 } | |
968 delete t; | |
969 } | |
970 } | |
971 | |
972 /** | |
973 * Test zero length and > 1 char length variable values. Test | |
974 * use of variable refs in UnicodeSets. | |
975 */ | |
976 void TransliteratorTest::TestArbitraryVariableValues(void) { | |
977 // Array of 3n items | |
978 // Each item is <rules>, <input>, <expected output> | |
979 UnicodeString DATA[] = { | |
980 "$abe = ab;" | |
981 "$pat = x[yY]z;" | |
982 "$ll = 'a-z';" | |
983 "$llZ = [$ll];" | |
984 "$llY = [$ll$pat];" | |
985 "$emp = ;" | |
986 | |
987 "$abe > ABE;" | |
988 "$pat > END;" | |
989 "$llZ > 1;" | |
990 "$llY > 2;" | |
991 "7$emp 8 > 9;" | |
992 "", | |
993 | |
994 "ab xYzxyz stY78", | |
995 "ABE ENDEND 1129", | |
996 }; | |
997 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); | |
998 | |
999 for (int32_t i=0; i<DATA_length; i+=3) { | |
1000 logln("Pattern: " + prettify(DATA[i])); | |
1001 UParseError parseError; | |
1002 UErrorCode status = U_ZERO_ERROR; | |
1003 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTR
ANS_FORWARD, parseError, status); | |
1004 if (U_FAILURE(status)) { | |
1005 errln("FAIL: RBT constructor"); | |
1006 } else { | |
1007 expect(*t, DATA[i+1], DATA[i+2]); | |
1008 } | |
1009 delete t; | |
1010 } | |
1011 } | |
1012 | |
1013 /** | |
1014 * Confirm that the contextStart, contextLimit, start, and limit | |
1015 * behave correctly. J474. | |
1016 */ | |
1017 void TransliteratorTest::TestPositionHandling(void) { | |
1018 // Array of 3n items | |
1019 // Each item is <rules>, <input>, <expected output> | |
1020 const char* DATA[] = { | |
1021 "a{t} > SS ; {t}b > UU ; {t} > TT ;", | |
1022 "xtat txtb", // pos 0,9,0,9 | |
1023 "xTTaSS TTxUUb", | |
1024 | |
1025 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", | |
1026 "xtat txtb", // pos 2,9,3,8 | |
1027 "xtaSS TTxUUb", | |
1028 | |
1029 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", | |
1030 "xtat txtb", // pos 3,8,3,8 | |
1031 "xtaTT TTxTTb", | |
1032 }; | |
1033 | |
1034 // Array of 4n positions -- these go with the DATA array | |
1035 // They are: contextStart, contextLimit, start, limit | |
1036 int32_t POS[] = { | |
1037 0, 9, 0, 9, | |
1038 2, 9, 3, 8, | |
1039 3, 8, 3, 8, | |
1040 }; | |
1041 | |
1042 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3; | |
1043 for (int32_t i=0; i<n; i++) { | |
1044 UErrorCode status = U_ZERO_ERROR; | |
1045 UParseError parseError; | |
1046 Transliterator *t = Transliterator::createFromRules("<ID>", | |
1047 DATA[3*i], UTRANS_FORWARD, parseError, status); | |
1048 if (U_FAILURE(status)) { | |
1049 delete t; | |
1050 errln("FAIL: RBT constructor"); | |
1051 return; | |
1052 } | |
1053 UTransPosition pos; | |
1054 pos.contextStart= POS[4*i]; | |
1055 pos.contextLimit = POS[4*i+1]; | |
1056 pos.start = POS[4*i+2]; | |
1057 pos.limit = POS[4*i+3]; | |
1058 UnicodeString rsource(DATA[3*i+1]); | |
1059 t->transliterate(rsource, pos, status); | |
1060 if (U_FAILURE(status)) { | |
1061 delete t; | |
1062 errln("FAIL: transliterate"); | |
1063 return; | |
1064 } | |
1065 t->finishTransliteration(rsource, pos); | |
1066 expectAux(DATA[3*i], | |
1067 DATA[3*i+1], | |
1068 rsource, | |
1069 DATA[3*i+2]); | |
1070 delete t; | |
1071 } | |
1072 } | |
1073 | |
1074 /** | |
1075 * Test the Hiragana-Katakana transliterator. | |
1076 */ | |
1077 void TransliteratorTest::TestHiraganaKatakana(void) { | |
1078 UParseError parseError; | |
1079 UErrorCode status = U_ZERO_ERROR; | |
1080 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTR
ANS_FORWARD, parseError, status); | |
1081 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTR
ANS_FORWARD, parseError, status); | |
1082 if (hk == 0 || kh == 0) { | |
1083 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); | |
1084 delete hk; | |
1085 delete kh; | |
1086 return; | |
1087 } | |
1088 | |
1089 // Array of 3n items | |
1090 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> | |
1091 const char* DATA[] = { | |
1092 "both", | |
1093 "\\u3042\\u3090\\u3099\\u3092\\u3050", | |
1094 "\\u30A2\\u30F8\\u30F2\\u30B0", | |
1095 | |
1096 "kh", | |
1097 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC", | |
1098 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC", | |
1099 }; | |
1100 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); | |
1101 | |
1102 for (int32_t i=0; i<DATA_length; i+=3) { | |
1103 UnicodeString h = CharsToUnicodeString(DATA[i+1]); | |
1104 UnicodeString k = CharsToUnicodeString(DATA[i+2]); | |
1105 switch (*DATA[i]) { | |
1106 case 0x68: //'h': // Hiragana-Katakana | |
1107 expect(*hk, h, k); | |
1108 break; | |
1109 case 0x6B: //'k': // Katakana-Hiragana | |
1110 expect(*kh, k, h); | |
1111 break; | |
1112 case 0x62: //'b': // both | |
1113 expect(*hk, h, k); | |
1114 expect(*kh, k, h); | |
1115 break; | |
1116 } | |
1117 } | |
1118 delete hk; | |
1119 delete kh; | |
1120 } | |
1121 | |
1122 /** | |
1123 * Test cloning / copy constructor of RBT. | |
1124 */ | |
1125 void TransliteratorTest::TestCopyJ476(void) { | |
1126 // The real test here is what happens when the destructors are | |
1127 // called. So we let one object get destructed, and check to | |
1128 // see that its copy still works. | |
1129 Transliterator *t2 = 0; | |
1130 { | |
1131 UParseError parseError; | |
1132 UErrorCode status = U_ZERO_ERROR; | |
1133 Transliterator *t1 = Transliterator::createFromRules("t1", | |
1134 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status); | |
1135 if (U_FAILURE(status)) { | |
1136 errln("FAIL: RBT constructor"); | |
1137 return; | |
1138 } | |
1139 t2 = t1->clone(); // Call copy constructor under the covers. | |
1140 expect(*t1, "abcfoofoo", "ABcbar"); | |
1141 delete t1; | |
1142 } | |
1143 expect(*t2, "abcfoofoo", "ABcbar"); | |
1144 delete t2; | |
1145 } | |
1146 | |
1147 /** | |
1148 * Test inter-Indic transliterators. These are composed. | |
1149 * ICU4C Jitterbug 483. | |
1150 */ | |
1151 void TransliteratorTest::TestInterIndic(void) { | |
1152 UnicodeString ID("Devanagari-Gujarati", ""); | |
1153 UErrorCode status = U_ZERO_ERROR; | |
1154 UParseError parseError; | |
1155 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, pars
eError, status); | |
1156 if (dg == 0) { | |
1157 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorN
ame(status)); | |
1158 return; | |
1159 } | |
1160 UnicodeString id = dg->getID(); | |
1161 if (id != ID) { | |
1162 errln("FAIL: createInstance(" + ID + ")->getID() => " + id); | |
1163 } | |
1164 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925"); | |
1165 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5"); | |
1166 expect(*dg, dev, guj); | |
1167 delete dg; | |
1168 } | |
1169 | |
1170 /** | |
1171 * Test filter syntax in IDs. (J918) | |
1172 */ | |
1173 void TransliteratorTest::TestFilterIDs(void) { | |
1174 // Array of 3n strings: | |
1175 // <id>, <inverse id>, <input>, <expected output> | |
1176 const char* DATA[] = { | |
1177 "[aeiou]Any-Hex", // ID | |
1178 "[aeiou]Hex-Any", // expected inverse ID | |
1179 "quizzical", // src | |
1180 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) | |
1181 | |
1182 "[aeiou]Any-Hex;[^5]Hex-Any", | |
1183 "[^5]Any-Hex;[aeiou]Hex-Any", | |
1184 "quizzical", | |
1185 "q\\u0075izzical", | |
1186 | |
1187 "[abc]Null", | |
1188 "[abc]Null", | |
1189 "xyz", | |
1190 "xyz", | |
1191 }; | |
1192 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) }; | |
1193 | |
1194 for (int i=0; i<DATA_length; i+=4) { | |
1195 UnicodeString ID(DATA[i], ""); | |
1196 UnicodeString uID(DATA[i+1], ""); | |
1197 UnicodeString data2(DATA[i+2], ""); | |
1198 UnicodeString data3(DATA[i+3], ""); | |
1199 UParseError parseError; | |
1200 UErrorCode status = U_ZERO_ERROR; | |
1201 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, p
arseError, status); | |
1202 if (t == 0) { | |
1203 errln("FAIL: createInstance(" + ID + ") returned NULL"); | |
1204 return; | |
1205 } | |
1206 expect(*t, data2, data3); | |
1207 | |
1208 // Check the ID | |
1209 if (ID != t->getID()) { | |
1210 errln("FAIL: createInstance(" + ID + ").getID() => " + | |
1211 t->getID()); | |
1212 } | |
1213 | |
1214 // Check the inverse | |
1215 Transliterator *u = t->createInverse(status); | |
1216 if (u == 0) { | |
1217 errln("FAIL: " + ID + ".createInverse() returned NULL"); | |
1218 } else if (u->getID() != uID) { | |
1219 errln("FAIL: " + ID + ".createInverse().getID() => " + | |
1220 u->getID() + ", expected " + uID); | |
1221 } | |
1222 | |
1223 delete t; | |
1224 delete u; | |
1225 } | |
1226 } | |
1227 | |
1228 /** | |
1229 * Test the case mapping transliterators. | |
1230 */ | |
1231 void TransliteratorTest::TestCaseMap(void) { | |
1232 UParseError parseError; | |
1233 UErrorCode status = U_ZERO_ERROR; | |
1234 Transliterator* toUpper = | |
1235 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, par
seError, status); | |
1236 Transliterator* toLower = | |
1237 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, par
seError, status); | |
1238 Transliterator* toTitle = | |
1239 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, par
seError, status); | |
1240 if (toUpper==0 || toLower==0 || toTitle==0) { | |
1241 errln("FAIL: createInstance returned NULL"); | |
1242 delete toUpper; | |
1243 delete toLower; | |
1244 delete toTitle; | |
1245 return; | |
1246 } | |
1247 | |
1248 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.", | |
1249 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); | |
1250 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", | |
1251 "the quick brown foX jumped over the lazY dogs."); | |
1252 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.", | |
1253 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); | |
1254 | |
1255 delete toUpper; | |
1256 delete toLower; | |
1257 delete toTitle; | |
1258 } | |
1259 | |
1260 /** | |
1261 * Test the name mapping transliterators. | |
1262 */ | |
1263 void TransliteratorTest::TestNameMap(void) { | |
1264 UParseError parseError; | |
1265 UErrorCode status = U_ZERO_ERROR; | |
1266 Transliterator* uni2name = | |
1267 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseEr
ror, status); | |
1268 Transliterator* name2uni = | |
1269 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, s
tatus); | |
1270 if (uni2name==0 || name2uni==0) { | |
1271 errln("FAIL: createInstance returned NULL"); | |
1272 delete uni2name; | |
1273 delete name2uni; | |
1274 return; | |
1275 } | |
1276 | |
1277 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N | |
1278 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFF
FD\\u0004\\u0009\\u0081\\uFFFF"), | |
1279 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOG
RAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHA
RACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<non
character-FFFF>}")); | |
1280 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK
UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{
REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\
\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"), | |
1281 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\
\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); | |
1282 | |
1283 delete uni2name; | |
1284 delete name2uni; | |
1285 | |
1286 // round trip | |
1287 Transliterator* t = | |
1288 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, pars
eError, status); | |
1289 if (t==0) { | |
1290 errln("FAIL: createInstance returned NULL"); | |
1291 delete t; | |
1292 return; | |
1293 } | |
1294 | |
1295 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N | |
1296 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0
A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"); | |
1297 expect(*t, s, s); | |
1298 delete t; | |
1299 } | |
1300 | |
1301 /** | |
1302 * Test liberalized ID syntax. 1006c | |
1303 */ | |
1304 void TransliteratorTest::TestLiberalizedID(void) { | |
1305 // Some test cases have an expected getID() value of NULL. This | |
1306 // means I have disabled the test case for now. This stuff is | |
1307 // still under development, and I haven't decided whether to make | |
1308 // getID() return canonical case yet. It will all get rewritten | |
1309 // with the move to Source-Target/Variant IDs anyway. [aliu] | |
1310 const char* DATA[] = { | |
1311 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity", | |
1312 " Null ", "Null", "whitespace", | |
1313 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", | |
1314 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whites
pace", | |
1315 }; | |
1316 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); | |
1317 UParseError parseError; | |
1318 UErrorCode status= U_ZERO_ERROR; | |
1319 for (int32_t i=0; i<DATA_length; i+=3) { | |
1320 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWA
RD, parseError, status); | |
1321 if (t == 0) { | |
1322 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] + | |
1323 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(statu
s)); | |
1324 } else { | |
1325 UnicodeString exp; | |
1326 if (DATA[i+1]) { | |
1327 exp = UnicodeString(DATA[i+1], ""); | |
1328 } | |
1329 // Don't worry about getID() if the expected char* | |
1330 // is NULL -- see above. | |
1331 if (exp.length() == 0 || exp == t->getID()) { | |
1332 logln(UnicodeString("Ok: ") + DATA[i+2] + | |
1333 " create ID \"" + DATA[i] + "\" => \"" + | |
1334 exp + "\""); | |
1335 } else { | |
1336 errln(UnicodeString("FAIL: ") + DATA[i+2] + | |
1337 " create ID \"" + DATA[i] + "\" => \"" + | |
1338 t->getID() + "\", exp \"" + exp + "\""); | |
1339 } | |
1340 delete t; | |
1341 } | |
1342 } | |
1343 } | |
1344 | |
1345 /* test for Jitterbug 912 */ | |
1346 void TransliteratorTest::TestCreateInstance(){ | |
1347 const char* FORWARD = "F"; | |
1348 const char* REVERSE = "R"; | |
1349 const char* DATA[] = { | |
1350 // Column 1: id | |
1351 // Column 2: direction | |
1352 // Column 3: expected ID, or "" if expect failure | |
1353 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 | |
1354 | |
1355 // JB#2689: bad compound causes crash | |
1356 "InvalidSource-InvalidTarget", FORWARD, "", | |
1357 "InvalidSource-InvalidTarget", REVERSE, "", | |
1358 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", | |
1359 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", | |
1360 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", | |
1361 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", | |
1362 | |
1363 NULL | |
1364 }; | |
1365 | |
1366 for (int32_t i=0; DATA[i]; i+=3) { | |
1367 UParseError err; | |
1368 UErrorCode ec = U_ZERO_ERROR; | |
1369 UnicodeString id(DATA[i]); | |
1370 UTransDirection dir = (DATA[i+1]==FORWARD)? | |
1371 UTRANS_FORWARD:UTRANS_REVERSE; | |
1372 UnicodeString expID(DATA[i+2]); | |
1373 Transliterator* t = | |
1374 Transliterator::createInstance(id,dir,err,ec); | |
1375 UnicodeString newID; | |
1376 if (t) { | |
1377 newID = t->getID(); | |
1378 } | |
1379 UBool ok = (newID == expID); | |
1380 if (!t) { | |
1381 newID = u_errorName(ec); | |
1382 } | |
1383 if (ok) { | |
1384 logln((UnicodeString)"Ok: createInstance(" + | |
1385 id + "," + DATA[i+1] + ") => " + newID); | |
1386 } else { | |
1387 dataerrln((UnicodeString)"FAIL: createInstance(" + | |
1388 id + "," + DATA[i+1] + ") => " + newID + | |
1389 ", expected " + expID); | |
1390 } | |
1391 delete t; | |
1392 } | |
1393 } | |
1394 | |
1395 /** | |
1396 * Test the normalization transliterator. | |
1397 */ | |
1398 void TransliteratorTest::TestNormalizationTransliterator() { | |
1399 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTes
t | |
1400 // PLEASE KEEP THEM IN SYNC WITH BasicTest. | |
1401 const char* CANON[] = { | |
1402 // Input Decomposed Composed | |
1403 "cat", "cat", "cat" , | |
1404 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" , | |
1405 | |
1406 "\\u1e0a", "D\\u0307", "\\u1e0a" , //
D-dot_above | |
1407 "D\\u0307", "D\\u0307", "\\u1e0a" , //
D dot_above | |
1408 | |
1409 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" ,
// D-dot_below dot_above | |
1410 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" ,
// D-dot_above dot_below | |
1411 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" ,
// D dot_below dot_above | |
1412 | |
1413 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307
", // D dot_below cedilla dot_above | |
1414 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307
", // D dot_above ogonek dot_below | |
1415 | |
1416 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , //
E-macron-grave | |
1417 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , /
/ E-macron + grave | |
1418 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" ,
// E-grave + macron | |
1419 | |
1420 "\\u212b", "A\\u030a", "\\u00c5" , //
angstrom_sign | |
1421 "\\u00c5", "A\\u030a", "\\u00c5" , //
A-ring | |
1422 | |
1423 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" ,
//updated with 3.0 | |
1424 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" ,
//updated with 3.0 | |
1425 | |
1426 "Henry IV", "Henry IV", "Henry IV" , | |
1427 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" , | |
1428 | |
1429 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , //
ga (Katakana) | |
1430 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , /
/ ka + ten | |
1431 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" ,
// hw_ka + hw_ten | |
1432 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" ,
// ka + hw_ten | |
1433 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" ,
// hw_ka + ten | |
1434 | |
1435 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" , | |
1436 0 // end | |
1437 }; | |
1438 | |
1439 const char* COMPAT[] = { | |
1440 // Input Decomposed Composed | |
1441 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , //
Alef-Lamed vs. Alef, Lamed | |
1442 | |
1443 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" ,
//updated for 3.0 | |
1444 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , //
ffi ligature -> f + f + i | |
1445 | |
1446 "Henry IV", "Henry IV", "Henry IV" , | |
1447 "Henry \\u2163", "Henry IV", "Henry IV" , | |
1448 | |
1449 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , //
ga (Katakana) | |
1450 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , /
/ ka + ten | |
1451 | |
1452 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , /
/ hw_ka + ten | |
1453 0 // end | |
1454 }; | |
1455 | |
1456 int32_t i; | |
1457 UParseError parseError; | |
1458 UErrorCode status = U_ZERO_ERROR; | |
1459 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD,
parseError, status); | |
1460 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD,
parseError, status); | |
1461 if (!NFD || !NFC) { | |
1462 dataerrln("FAIL: createInstance failed: %s", u_errorName(status)); | |
1463 delete NFD; | |
1464 delete NFC; | |
1465 return; | |
1466 } | |
1467 for (i=0; CANON[i]; i+=3) { | |
1468 UnicodeString in = CharsToUnicodeString(CANON[i]); | |
1469 UnicodeString expd = CharsToUnicodeString(CANON[i+1]); | |
1470 UnicodeString expc = CharsToUnicodeString(CANON[i+2]); | |
1471 expect(*NFD, in, expd); | |
1472 expect(*NFC, in, expc); | |
1473 } | |
1474 delete NFD; | |
1475 delete NFC; | |
1476 | |
1477 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError, status); | |
1478 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError, status); | |
1479 if (!NFKD || !NFKC) { | |
1480 dataerrln("FAIL: createInstance failed"); | |
1481 delete NFKD; | |
1482 delete NFKC; | |
1483 return; | |
1484 } | |
1485 for (i=0; COMPAT[i]; i+=3) { | |
1486 UnicodeString in = CharsToUnicodeString(COMPAT[i]); | |
1487 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]); | |
1488 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]); | |
1489 expect(*NFKD, in, expkd); | |
1490 expect(*NFKC, in, expkc); | |
1491 } | |
1492 delete NFKD; | |
1493 delete NFKC; | |
1494 | |
1495 UParseError pe; | |
1496 status = U_ZERO_ERROR; | |
1497 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove", | |
1498 UTRANS_FORWARD, | |
1499 pe, status); | |
1500 if (t == 0) { | |
1501 errln("FAIL: createInstance failed"); | |
1502 } | |
1503 expect(*t, CharsToUnicodeString("\\u010dx"), | |
1504 CharsToUnicodeString("c\\u030C")); | |
1505 delete t; | |
1506 } | |
1507 | |
1508 /** | |
1509 * Test compound RBT rules. | |
1510 */ | |
1511 void TransliteratorTest::TestCompoundRBT(void) { | |
1512 // Careful with spacing and ';' here: Phrase this exactly | |
1513 // as toRules() is going to return it. If toRules() changes | |
1514 // with regard to spacing or ';', then adjust this string. | |
1515 UnicodeString rule("::Hex-Any;\n" | |
1516 "::Any-Lower;\n" | |
1517 "a > '.A.';\n" | |
1518 "b > '.B.';\n" | |
1519 "::[^t]Any-Upper;", ""); | |
1520 UParseError parseError; | |
1521 UErrorCode status = U_ZERO_ERROR; | |
1522 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FOR
WARD, parseError, status); | |
1523 if (t == 0) { | |
1524 errln("FAIL: createFromRules failed"); | |
1525 return; | |
1526 } | |
1527 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"), | |
1528 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); | |
1529 UnicodeString r; | |
1530 t->toRules(r, TRUE); | |
1531 if (r == rule) { | |
1532 logln((UnicodeString)"OK: toRules() => " + r); | |
1533 } else { | |
1534 errln((UnicodeString)"FAIL: toRules() => " + r + | |
1535 ", expected " + rule); | |
1536 } | |
1537 delete t; | |
1538 | |
1539 // Now test toRules | |
1540 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FOR
WARD, parseError, status); | |
1541 if (t == 0) { | |
1542 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); | |
1543 return; | |
1544 } | |
1545 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;"); | |
1546 t->toRules(r, TRUE); | |
1547 if (r != exp) { | |
1548 errln((UnicodeString)"FAIL: toRules() => " + r + | |
1549 ", expected " + exp); | |
1550 } else { | |
1551 logln((UnicodeString)"OK: toRules() => " + r); | |
1552 } | |
1553 delete t; | |
1554 | |
1555 // Round trip the result of toRules | |
1556 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, s
tatus); | |
1557 if (t == 0) { | |
1558 errln("FAIL: createFromRules #2 failed"); | |
1559 return; | |
1560 } else { | |
1561 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded"); | |
1562 } | |
1563 | |
1564 // Test toRules again | |
1565 t->toRules(r, TRUE); | |
1566 if (r != exp) { | |
1567 errln((UnicodeString)"FAIL: toRules() => " + r + | |
1568 ", expected " + exp); | |
1569 } else { | |
1570 logln((UnicodeString)"OK: toRules() => " + r); | |
1571 } | |
1572 | |
1573 delete t; | |
1574 | |
1575 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform | |
1576 // to what the regenerated ID will look like. | |
1577 UnicodeString id("Upper(Lower);(NFKC)", ""); | |
1578 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status); | |
1579 if (t == 0) { | |
1580 errln("FAIL: createInstance #2 failed"); | |
1581 return; | |
1582 } | |
1583 if (t->getID() == id) { | |
1584 logln((UnicodeString)"OK: created " + id); | |
1585 } else { | |
1586 errln((UnicodeString)"FAIL: createInstance(" + id + | |
1587 ").getID() => " + t->getID()); | |
1588 } | |
1589 | |
1590 Transliterator *u = t->createInverse(status); | |
1591 if (u == 0) { | |
1592 errln("FAIL: createInverse failed"); | |
1593 delete t; | |
1594 return; | |
1595 } | |
1596 exp = "NFKC();Lower(Upper)"; | |
1597 if (u->getID() == exp) { | |
1598 logln((UnicodeString)"OK: createInverse(" + id + ") => " + | |
1599 u->getID()); | |
1600 } else { | |
1601 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " + | |
1602 u->getID()); | |
1603 } | |
1604 delete t; | |
1605 delete u; | |
1606 } | |
1607 | |
1608 /** | |
1609 * Compound filter semantics were orginially not implemented | |
1610 * correctly. Originally, each component filter f(i) is replaced by | |
1611 * f'(i) = f(i) && g, where g is the filter for the compound | |
1612 * transliterator. | |
1613 * | |
1614 * From Mark: | |
1615 * | |
1616 * Suppose and I have a transliterator X. Internally X is | |
1617 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. | |
1618 * | |
1619 * The compound should convert all greek characters (through latin) to | |
1620 * cyrillic, then lowercase the result. The filter should say "don't | |
1621 * touch 'A' in the original". But because an intermediate result | |
1622 * happens to go through "A", the Greek Alpha gets hung up. | |
1623 */ | |
1624 void TransliteratorTest::TestCompoundFilter(void) { | |
1625 UParseError parseError; | |
1626 UErrorCode status = U_ZERO_ERROR; | |
1627 Transliterator *t = Transliterator::createInstance | |
1628 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status); | |
1629 if (t == 0) { | |
1630 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); | |
1631 return; | |
1632 } | |
1633 t->adoptFilter(new UnicodeSet("[^A]", status)); | |
1634 if (U_FAILURE(status)) { | |
1635 errln("FAIL: UnicodeSet ct failed"); | |
1636 delete t; | |
1637 return; | |
1638 } | |
1639 | |
1640 // Only the 'A' at index 1 should remain unchanged | |
1641 expect(*t, | |
1642 CharsToUnicodeString("BA\\u039A\\u0391"), | |
1643 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); | |
1644 delete t; | |
1645 } | |
1646 | |
1647 void TransliteratorTest::TestRemove(void) { | |
1648 UParseError parseError; | |
1649 UErrorCode status = U_ZERO_ERROR; | |
1650 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FOR
WARD, parseError, status); | |
1651 if (t == 0) { | |
1652 errln("FAIL: createInstance failed"); | |
1653 return; | |
1654 } | |
1655 | |
1656 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts"); | |
1657 | |
1658 // extra test for RemoveTransliterator::clone(), which at one point wasn't | |
1659 // duplicating the filter | |
1660 Transliterator* t2 = t->clone(); | |
1661 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts"); | |
1662 | |
1663 delete t; | |
1664 delete t2; | |
1665 } | |
1666 | |
1667 void TransliteratorTest::TestToRules(void) { | |
1668 const char* RBT = "rbt"; | |
1669 const char* SET = "set"; | |
1670 static const char* DATA[] = { | |
1671 RBT, | |
1672 "$a=\\u4E61; [$a] > A;", | |
1673 "[\\u4E61] > A;", | |
1674 | |
1675 RBT, | |
1676 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", | |
1677 "[[:Zs:][:Zl:]]{a} > A;", | |
1678 | |
1679 SET, | |
1680 "[[:Zs:][:Zl:]]", | |
1681 "[[:Zs:][:Zl:]]", | |
1682 | |
1683 SET, | |
1684 "[:Ps:]", | |
1685 "[:Ps:]", | |
1686 | |
1687 SET, | |
1688 "[:L:]", | |
1689 "[:L:]", | |
1690 | |
1691 SET, | |
1692 "[[:L:]-[A]]", | |
1693 "[[:L:]-[A]]", | |
1694 | |
1695 SET, | |
1696 "[~[:Lu:][:Ll:]]", | |
1697 "[~[:Lu:][:Ll:]]", | |
1698 | |
1699 SET, | |
1700 "[~[a-z]]", | |
1701 "[~[a-z]]", | |
1702 | |
1703 RBT, | |
1704 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", | |
1705 "[^[:Zs:]]{a} > A;", | |
1706 | |
1707 RBT, | |
1708 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", | |
1709 "[[a-z]-[:Zs:]]{a} > A;", | |
1710 | |
1711 RBT, | |
1712 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", | |
1713 "[[:Zs:]&[a-z]]{a} > A;", | |
1714 | |
1715 RBT, | |
1716 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", | |
1717 "[x[:Zs:]]{a} > A;", | |
1718 | |
1719 RBT, | |
1720 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;" | |
1721 "$macron = \\u0304 ;" | |
1722 "$evowel = [aeiouyAEIOUY] ;" | |
1723 "$iotasub = \\u0345 ;" | |
1724 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", | |
1725 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u
0345;", | |
1726 | |
1727 RBT, | |
1728 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", | |
1729 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", | |
1730 }; | |
1731 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]))
; | |
1732 | |
1733 for (int32_t d=0; d < DATA_length; d+=3) { | |
1734 if (DATA[d] == RBT) { | |
1735 // Transliterator test | |
1736 UParseError parseError; | |
1737 UErrorCode status = U_ZERO_ERROR; | |
1738 Transliterator *t = Transliterator::createFromRules("ID", | |
1739 UnicodeString(DA
TA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status); | |
1740 if (t == 0) { | |
1741 dataerrln("FAIL: createFromRules failed - %s", u_errorName(statu
s)); | |
1742 return; | |
1743 } | |
1744 UnicodeString rules, escapedRules; | |
1745 t->toRules(rules, FALSE); | |
1746 t->toRules(escapedRules, TRUE); | |
1747 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]); | |
1748 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV); | |
1749 if (rules == expRules) { | |
1750 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_IN
V) + | |
1751 " => " + rules); | |
1752 } else { | |
1753 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_
INV) + | |
1754 " => " + rules + ", exp " + expRules); | |
1755 } | |
1756 if (escapedRules == expEscapedRules) { | |
1757 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_IN
V) + | |
1758 " => " + escapedRules); | |
1759 } else { | |
1760 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_
INV) + | |
1761 " => " + escapedRules + ", exp " + expEscapedRules); | |
1762 } | |
1763 delete t; | |
1764 | |
1765 } else { | |
1766 // UnicodeSet test | |
1767 UErrorCode status = U_ZERO_ERROR; | |
1768 UnicodeString pat(DATA[d+1], -1, US_INV); | |
1769 UnicodeString expToPat(DATA[d+2], -1, US_INV); | |
1770 UnicodeSet set(pat, status); | |
1771 if (U_FAILURE(status)) { | |
1772 errln("FAIL: UnicodeSet ct failed"); | |
1773 return; | |
1774 } | |
1775 // Adjust spacing etc. as necessary. | |
1776 UnicodeString toPat; | |
1777 set.toPattern(toPat); | |
1778 if (expToPat == toPat) { | |
1779 logln((UnicodeString)"Ok: " + pat + | |
1780 " => " + toPat); | |
1781 } else { | |
1782 errln((UnicodeString)"FAIL: " + pat + | |
1783 " => " + prettify(toPat, TRUE) + | |
1784 ", exp " + prettify(pat, TRUE)); | |
1785 } | |
1786 } | |
1787 } | |
1788 } | |
1789 | |
1790 void TransliteratorTest::TestContext() { | |
1791 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l | |
1792 expect("de > x; {d}e > y;", | |
1793 "de", | |
1794 "ye", | |
1795 &pos); | |
1796 | |
1797 expect("ab{c} > z;", | |
1798 "xadabdabcy", | |
1799 "xadabdabzy"); | |
1800 } | |
1801 | |
1802 void TransliteratorTest::TestSupplemental() { | |
1803 | |
1804 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" | |
1805 "a > $a; $s > i;"), | |
1806 CharsToUnicodeString("ab\\U0001030Fx"), | |
1807 CharsToUnicodeString("\\U00010300bix")); | |
1808 | |
1809 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" | |
1810 "$b=[A-Z\\U00010400-\\U0001044D];" | |
1811 "($a)($b) > $2 $1;"), | |
1812 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301
D"), | |
1813 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U0001030
1")); | |
1814 | |
1815 // k|ax\\U00010300xm | |
1816 | |
1817 // k|a\\U00010400\\U00010300xm | |
1818 // ky|\\U00010400\\U00010300xm | |
1819 // ky\\U00010400|\\U00010300xm | |
1820 | |
1821 // ky\\U00010400|\\U00010300\\U00010400m | |
1822 // ky\\U00010400y|\\U00010400m | |
1823 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" | |
1824 "$a {x} > | @ \\U00010400;" | |
1825 "{$a} [^\\u0000-\\uFFFF] > y;"), | |
1826 CharsToUnicodeString("kax\\U00010300xm"), | |
1827 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); | |
1828 | |
1829 expectT("Any-Name", | |
1830 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), | |
1831 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LET
TER A}\\N{NO-BREAK SPACE}")); | |
1832 | |
1833 expectT("Any-Hex/Unicode", | |
1834 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1835 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0")); | |
1836 | |
1837 expectT("Any-Hex/C", | |
1838 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1839 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0")); | |
1840 | |
1841 expectT("Any-Hex/Perl", | |
1842 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1843 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}")); | |
1844 | |
1845 expectT("Any-Hex/Java", | |
1846 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1847 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u0
0A0")); | |
1848 | |
1849 expectT("Any-Hex/XML", | |
1850 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1851 "𐌰􏼀󠁡 "); | |
1852 | |
1853 expectT("Any-Hex/XML10", | |
1854 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1855 "𐌰􏼀󠁡 "); | |
1856 | |
1857 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"), | |
1858 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), | |
1859 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); | |
1860 } | |
1861 | |
1862 void TransliteratorTest::TestQuantifier() { | |
1863 | |
1864 // Make sure @ in a quantified anteContext works | |
1865 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", | |
1866 "AAAAAb", | |
1867 "aaa(aac)"); | |
1868 | |
1869 // Make sure @ in a quantified postContext works | |
1870 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", | |
1871 "baaaaa", | |
1872 "caa(aaa)"); | |
1873 | |
1874 // Make sure @ in a quantified postContext with seg ref works | |
1875 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", | |
1876 "baaaaa", | |
1877 "baa(aaa)"); | |
1878 | |
1879 // Make sure @ past ante context doesn't enter ante context | |
1880 UTransPosition pos = {0, 5, 3, 5}; | |
1881 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", | |
1882 "xxxab", | |
1883 "xxx(ac)", | |
1884 &pos); | |
1885 | |
1886 // Make sure @ past post context doesn't pass limit | |
1887 UTransPosition pos2 = {0, 4, 0, 2}; | |
1888 expect("{b} a+ > c @@ |; x > y; a > A;", | |
1889 "baxx", | |
1890 "caxx", | |
1891 &pos2); | |
1892 | |
1893 // Make sure @ past post context doesn't enter post context | |
1894 expect("{b} a+ > c @@ |; x > y; a > A;", | |
1895 "baxx", | |
1896 "cayy"); | |
1897 | |
1898 expect("(ab)? c > d;", | |
1899 "c abc ababc", | |
1900 "d d abd"); | |
1901 | |
1902 // NOTE: The (ab)+ when referenced just yields a single "ab", | |
1903 // not the full sequence of them. This accords with perl behavior. | |
1904 expect("(ab)+ {x} > '(' $1 ')';", | |
1905 "x abx ababxy", | |
1906 "x ab(ab) abab(ab)y"); | |
1907 | |
1908 expect("b+ > x;", | |
1909 "ac abc abbc abbbc", | |
1910 "ac axc axc axc"); | |
1911 | |
1912 expect("[abc]+ > x;", | |
1913 "qac abrc abbcs abtbbc", | |
1914 "qx xrx xs xtx"); | |
1915 | |
1916 expect("q{(ab)+} > x;", | |
1917 "qa qab qaba qababc qaba", | |
1918 "qa qx qxa qxc qxa"); | |
1919 | |
1920 expect("q(ab)* > x;", | |
1921 "qa qab qaba qababc", | |
1922 "xa x xa xc"); | |
1923 | |
1924 // NOTE: The (ab)+ when referenced just yields a single "ab", | |
1925 // not the full sequence of them. This accords with perl behavior. | |
1926 expect("q(ab)* > '(' $1 ')';", | |
1927 "qa qab qaba qababc", | |
1928 "()a (ab) (ab)a (ab)c"); | |
1929 | |
1930 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire | |
1931 // quoted string | |
1932 expect("'ab'+ > x;", | |
1933 "bb ab ababb", | |
1934 "bb x xb"); | |
1935 | |
1936 // $foo+ and $foo* -- the quantifier should apply to the entire | |
1937 // variable reference | |
1938 expect("$var = ab; $var+ > x;", | |
1939 "bb ab ababb", | |
1940 "bb x xb"); | |
1941 } | |
1942 | |
1943 class TestTrans : public Transliterator { | |
1944 public: | |
1945 TestTrans(const UnicodeString& id) : Transliterator(id, 0) { | |
1946 } | |
1947 virtual Transliterator* clone(void) const { | |
1948 return new TestTrans(getID()); | |
1949 } | |
1950 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offs
ets, | |
1951 UBool /*isIncremental*/) const | |
1952 { | |
1953 offsets.start = offsets.limit; | |
1954 } | |
1955 virtual UClassID getDynamicClassID() const; | |
1956 static UClassID U_EXPORT2 getStaticClassID(); | |
1957 }; | |
1958 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans) | |
1959 | |
1960 /** | |
1961 * Test Source-Target/Variant. | |
1962 */ | |
1963 void TransliteratorTest::TestSTV(void) { | |
1964 int32_t ns = Transliterator::countAvailableSources(); | |
1965 if (ns < 0 || ns > 255) { | |
1966 errln((UnicodeString)"FAIL: Bad source count: " + ns); | |
1967 return; | |
1968 } | |
1969 int32_t i, j; | |
1970 for (i=0; i<ns; ++i) { | |
1971 UnicodeString source; | |
1972 Transliterator::getAvailableSource(i, source); | |
1973 logln((UnicodeString)"" + i + ": " + source); | |
1974 if (source.length() == 0) { | |
1975 errln("FAIL: empty source"); | |
1976 continue; | |
1977 } | |
1978 int32_t nt = Transliterator::countAvailableTargets(source); | |
1979 if (nt < 0 || nt > 255) { | |
1980 errln((UnicodeString)"FAIL: Bad target count: " + nt); | |
1981 continue; | |
1982 } | |
1983 for (int32_t j=0; j<nt; ++j) { | |
1984 UnicodeString target; | |
1985 Transliterator::getAvailableTarget(j, source, target); | |
1986 logln((UnicodeString)" " + j + ": " + target); | |
1987 if (target.length() == 0) { | |
1988 errln("FAIL: empty target"); | |
1989 continue; | |
1990 } | |
1991 int32_t nv = Transliterator::countAvailableVariants(source, target); | |
1992 if (nv < 0 || nv > 255) { | |
1993 errln((UnicodeString)"FAIL: Bad variant count: " + nv); | |
1994 continue; | |
1995 } | |
1996 for (int32_t k=0; k<nv; ++k) { | |
1997 UnicodeString variant; | |
1998 Transliterator::getAvailableVariant(k, source, target, variant); | |
1999 if (variant.length() == 0) { | |
2000 logln((UnicodeString)" " + k + ": <empty>"); | |
2001 } else { | |
2002 logln((UnicodeString)" " + k + ": " + variant); | |
2003 } | |
2004 } | |
2005 } | |
2006 } | |
2007 | |
2008 // Test registration | |
2009 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; | |
2010 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/
Vsie" }; | |
2011 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" }; | |
2012 for (i=0; i<3; ++i) { | |
2013 Transliterator *t = new TestTrans(IDS[i]); | |
2014 if (t == 0) { | |
2015 errln("FAIL: out of memory"); | |
2016 return; | |
2017 } | |
2018 if (t->getID() != IDS[i]) { | |
2019 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]); | |
2020 delete t; | |
2021 return; | |
2022 } | |
2023 Transliterator::registerInstance(t); | |
2024 UErrorCode status = U_ZERO_ERROR; | |
2025 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status); | |
2026 if (t == NULL) { | |
2027 errln((UnicodeString)"FAIL: Registration/creation failed for ID " + | |
2028 IDS[i]); | |
2029 } else { | |
2030 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " + | |
2031 IDS[i]); | |
2032 delete t; | |
2033 } | |
2034 Transliterator::unregister(IDS[i]); | |
2035 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status); | |
2036 if (t != NULL) { | |
2037 errln((UnicodeString)"FAIL: Unregistration failed for ID " + | |
2038 IDS[i]); | |
2039 delete t; | |
2040 } | |
2041 } | |
2042 | |
2043 // Make sure getAvailable API reflects removal | |
2044 int32_t n = Transliterator::countAvailableIDs(); | |
2045 for (i=0; i<n; ++i) { | |
2046 UnicodeString id = Transliterator::getAvailableID(i); | |
2047 for (j=0; j<3; ++j) { | |
2048 if (id.caseCompare(FULL_IDS[j],0)==0) { | |
2049 errln((UnicodeString)"FAIL: unregister(" + id + ") failed"); | |
2050 } | |
2051 } | |
2052 } | |
2053 n = Transliterator::countAvailableTargets("Any"); | |
2054 for (i=0; i<n; ++i) { | |
2055 UnicodeString t; | |
2056 Transliterator::getAvailableTarget(i, "Any", t); | |
2057 if (t.caseCompare(IDS[0],0)==0) { | |
2058 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed"); | |
2059 } | |
2060 } | |
2061 n = Transliterator::countAvailableSources(); | |
2062 for (i=0; i<n; ++i) { | |
2063 UnicodeString s; | |
2064 Transliterator::getAvailableSource(i, s); | |
2065 for (j=0; j<3; ++j) { | |
2066 if (SOURCES[j] == NULL) continue; | |
2067 if (s.caseCompare(SOURCES[j],0)==0) { | |
2068 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed"); | |
2069 } | |
2070 } | |
2071 } | |
2072 } | |
2073 | |
2074 /** | |
2075 * Test inverse of Greek-Latin; Title() | |
2076 */ | |
2077 void TransliteratorTest::TestCompoundInverse(void) { | |
2078 UParseError parseError; | |
2079 UErrorCode status = U_ZERO_ERROR; | |
2080 Transliterator *t = Transliterator::createInstance | |
2081 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status); | |
2082 if (t == 0) { | |
2083 dataerrln("FAIL: createInstance - %s", u_errorName(status)); | |
2084 return; | |
2085 } | |
2086 UnicodeString exp("(Title);Latin-Greek"); | |
2087 if (t->getID() == exp) { | |
2088 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + | |
2089 t->getID()); | |
2090 } else { | |
2091 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + | |
2092 t->getID() + "\", expected \"" + exp + "\""); | |
2093 } | |
2094 delete t; | |
2095 } | |
2096 | |
2097 /** | |
2098 * Test NFD chaining with RBT | |
2099 */ | |
2100 void TransliteratorTest::TestNFDChainRBT() { | |
2101 UParseError pe; | |
2102 UErrorCode ec = U_ZERO_ERROR; | |
2103 Transliterator* t = Transliterator::createFromRules( | |
2104 "TEST", "::NFD; aa > Q; a > q;", | |
2105 UTRANS_FORWARD, pe, ec); | |
2106 if (t == NULL || U_FAILURE(ec)) { | |
2107 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_erro
rName(ec)); | |
2108 return; | |
2109 } | |
2110 expect(*t, "aa", "Q"); | |
2111 delete t; | |
2112 | |
2113 // TEMPORARY TESTS -- BEING DEBUGGED | |
2114 //=- UnicodeString s, s2; | |
2115 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, p
e, ec); | |
2116 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); | |
2117 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u
0937\\u0947\\u0924\\u094D"); | |
2118 //=- expect(*t, s, s2); | |
2119 //=- delete t; | |
2120 //=- | |
2121 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, p
e, ec); | |
2122 //=- expect(*t, s2, s); | |
2123 //=- delete t; | |
2124 //=- | |
2125 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin",
UTRANS_FORWARD, pe, ec); | |
2126 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); | |
2127 //=- expect(*t, s, s); | |
2128 //=- delete t; | |
2129 | |
2130 // const char* source[] = { | |
2131 // /* | |
2132 // "\\u015Br\\u012Bmad", | |
2133 // "bhagavadg\\u012Bt\\u0101", | |
2134 // "adhy\\u0101ya", | |
2135 // "arjuna", | |
2136 // "vi\\u1E63\\u0101da", | |
2137 // "y\\u014Dga", | |
2138 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", | |
2139 // "uv\\u0101cr\\u0325", | |
2140 // */ | |
2141 // "rmk\\u1E63\\u0113t", | |
2142 // //"dharmak\\u1E63\\u0113tr\\u0113", | |
2143 // /* | |
2144 // "kuruk\\u1E63\\u0113tr\\u0113", | |
2145 // "samav\\u0113t\\u0101", | |
2146 // "yuyutsava-\\u1E25", | |
2147 // "m\\u0101mak\\u0101-\\u1E25", | |
2148 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", | |
2149 // "kimakurvata", | |
2150 // "san\\u0304java", | |
2151 // */ | |
2152 // | |
2153 // 0 | |
2154 // }; | |
2155 // const char* expected[] = { | |
2156 // /* | |
2157 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", | |
2158 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", | |
2159 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", | |
2160 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", | |
2161 // "\\u0935\\u093f\\u0937\\u093e\\u0926", | |
2162 // "\\u092f\\u094b\\u0917", | |
2163 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u093
0", | |
2164 // "\\u0909\\u0935\\u093E\\u091A\\u0943", | |
2165 // */ | |
2166 // "\\u0927", | |
2167 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u0
94d\\u0930\\u0947", | |
2168 // /* | |
2169 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094
d\\u0930\\u0947", | |
2170 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", | |
2171 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", | |
2172 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", | |
2173 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091
a\\u0948\\u0935", | |
2174 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", | |
2175 // "\\u0938\\u0902\\u091c\\u0935", | |
2176 // */ | |
2177 // 0 | |
2178 // }; | |
2179 // UErrorCode status = U_ZERO_ERROR; | |
2180 // UParseError parseError; | |
2181 // UnicodeString message; | |
2182 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-De
vanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); | |
2183 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari
-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); | |
2184 // if(U_FAILURE(status)){ | |
2185 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorNam
e(status)); | |
2186 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext:
" + prettify( parseError.postContext) ); | |
2187 // delete latinToDevToLatin; | |
2188 // delete devToLatinToDev; | |
2189 // return; | |
2190 // } | |
2191 // UnicodeString gotResult; | |
2192 // for(int i= 0; source[i] != 0; i++){ | |
2193 // gotResult = source[i]; | |
2194 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnico
deString(source[i])); | |
2195 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnico
deString(expected[i])); | |
2196 // } | |
2197 // delete latinToDevToLatin; | |
2198 // delete devToLatinToDev; | |
2199 } | |
2200 | |
2201 /** | |
2202 * Inverse of "Null" should be "Null". (J21) | |
2203 */ | |
2204 void TransliteratorTest::TestNullInverse() { | |
2205 UParseError pe; | |
2206 UErrorCode ec = U_ZERO_ERROR; | |
2207 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, p
e, ec); | |
2208 if (t == 0 || U_FAILURE(ec)) { | |
2209 errln("FAIL: createInstance"); | |
2210 return; | |
2211 } | |
2212 Transliterator *u = t->createInverse(ec); | |
2213 if (u == 0 || U_FAILURE(ec)) { | |
2214 errln("FAIL: createInverse"); | |
2215 delete t; | |
2216 return; | |
2217 } | |
2218 if (u->getID() != "Null") { | |
2219 errln("FAIL: Inverse of Null should be Null"); | |
2220 } | |
2221 delete t; | |
2222 delete u; | |
2223 } | |
2224 | |
2225 /** | |
2226 * Check ID of inverse of alias. (J22) | |
2227 */ | |
2228 void TransliteratorTest::TestAliasInverseID() { | |
2229 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an
inverse | |
2230 UParseError pe; | |
2231 UErrorCode ec = U_ZERO_ERROR; | |
2232 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, e
c); | |
2233 if (t == 0 || U_FAILURE(ec)) { | |
2234 dataerrln("FAIL: createInstance - %s", u_errorName(ec)); | |
2235 return; | |
2236 } | |
2237 Transliterator *u = t->createInverse(ec); | |
2238 if (u == 0 || U_FAILURE(ec)) { | |
2239 errln("FAIL: createInverse"); | |
2240 delete t; | |
2241 return; | |
2242 } | |
2243 UnicodeString exp = "Hangul-Latin"; | |
2244 UnicodeString got = u->getID(); | |
2245 if (got != exp) { | |
2246 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got + | |
2247 ", expected " + exp); | |
2248 } | |
2249 delete t; | |
2250 delete u; | |
2251 } | |
2252 | |
2253 /** | |
2254 * Test IDs of inverses of compound transliterators. (J20) | |
2255 */ | |
2256 void TransliteratorTest::TestCompoundInverseID() { | |
2257 UnicodeString ID = "Latin-Jamo;NFC(NFD)"; | |
2258 UParseError pe; | |
2259 UErrorCode ec = U_ZERO_ERROR; | |
2260 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, e
c); | |
2261 if (t == 0 || U_FAILURE(ec)) { | |
2262 dataerrln("FAIL: createInstance - %s", u_errorName(ec)); | |
2263 return; | |
2264 } | |
2265 Transliterator *u = t->createInverse(ec); | |
2266 if (u == 0 || U_FAILURE(ec)) { | |
2267 errln("FAIL: createInverse"); | |
2268 delete t; | |
2269 return; | |
2270 } | |
2271 UnicodeString exp = "NFD(NFC);Jamo-Latin"; | |
2272 UnicodeString got = u->getID(); | |
2273 if (got != exp) { | |
2274 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got + | |
2275 ", expected " + exp); | |
2276 } | |
2277 delete t; | |
2278 delete u; | |
2279 } | |
2280 | |
2281 /** | |
2282 * Test undefined variable. | |
2283 | |
2284 */ | |
2285 void TransliteratorTest::TestUndefinedVariable() { | |
2286 UnicodeString rule = "$initial } a <> \\u1161;"; | |
2287 UParseError pe; | |
2288 UErrorCode ec = U_ZERO_ERROR; | |
2289 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FOR
WARD, pe, ec); | |
2290 delete t; | |
2291 if (U_FAILURE(ec)) { | |
2292 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: "
+ | |
2293 u_errorName(ec)); | |
2294 return; | |
2295 } | |
2296 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " + | |
2297 u_errorName(ec)); | |
2298 } | |
2299 | |
2300 /** | |
2301 * Test empty context. | |
2302 */ | |
2303 void TransliteratorTest::TestEmptyContext() { | |
2304 expect(" { a } > b;", "xay a ", "xby b "); | |
2305 } | |
2306 | |
2307 /** | |
2308 * Test compound filter ID syntax | |
2309 */ | |
2310 void TransliteratorTest::TestCompoundFilterID(void) { | |
2311 static const char* DATA[] = { | |
2312 // Col. 1 = ID or rule set (latter must start with #) | |
2313 | |
2314 // = columns > 1 are null if expect col. 1 to be illegal = | |
2315 | |
2316 // Col. 2 = direction, "F..." or "R..." | |
2317 // Col. 3 = source string | |
2318 // Col. 4 = exp result | |
2319 | |
2320 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters | |
2321 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter | |
2322 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c", | |
2323 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\
\u0393", "\\u0391b\\u0393", | |
2324 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c", | |
2325 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\
\u0391\\u0392\\u0393", "\\u0391b\\u0393", | |
2326 NULL, | |
2327 }; | |
2328 | |
2329 for (int32_t i=0; DATA[i]; i+=4) { | |
2330 UnicodeString id = CharsToUnicodeString(DATA[i]); | |
2331 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ? | |
2332 UTRANS_REVERSE : UTRANS_FORWARD; | |
2333 UnicodeString source; | |
2334 UnicodeString exp; | |
2335 if (DATA[i+2] != NULL) { | |
2336 source = CharsToUnicodeString(DATA[i+2]); | |
2337 exp = CharsToUnicodeString(DATA[i+3]); | |
2338 } | |
2339 UBool expOk = (DATA[i+1] != NULL); | |
2340 Transliterator* t = NULL; | |
2341 UParseError pe; | |
2342 UErrorCode ec = U_ZERO_ERROR; | |
2343 if (id.charAt(0) == 0x23/*#*/) { | |
2344 t = Transliterator::createFromRules("ID", id, direction, pe, ec); | |
2345 } else { | |
2346 t = Transliterator::createInstance(id, direction, pe, ec); | |
2347 } | |
2348 UBool ok = (t != NULL && U_SUCCESS(ec)); | |
2349 UnicodeString transID; | |
2350 if (t!=0) { | |
2351 transID = t->getID(); | |
2352 } | |
2353 else { | |
2354 transID = UnicodeString("NULL", ""); | |
2355 } | |
2356 if (ok == expOk) { | |
2357 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " + | |
2358 u_errorName(ec)); | |
2359 if (source.length() != 0) { | |
2360 expect(*t, source, exp); | |
2361 } | |
2362 delete t; | |
2363 } else { | |
2364 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " + | |
2365 u_errorName(ec)); | |
2366 } | |
2367 } | |
2368 } | |
2369 | |
2370 /** | |
2371 * Test new property set syntax | |
2372 */ | |
2373 void TransliteratorTest::TestPropertySet() { | |
2374 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyx
xx"); | |
2375 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", | |
2376 "[ a stitch ]\n[ in time ]\r[ saves 9]"); | |
2377 } | |
2378 | |
2379 /** | |
2380 * Test various failure points of the new 2.0 engine. | |
2381 */ | |
2382 void TransliteratorTest::TestNewEngine() { | |
2383 UParseError pe; | |
2384 UErrorCode ec = U_ZERO_ERROR; | |
2385 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_
FORWARD, pe, ec); | |
2386 if (t == 0 || U_FAILURE(ec)) { | |
2387 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec)); | |
2388 return; | |
2389 } | |
2390 // Katakana should be untouched | |
2391 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"), | |
2392 CharsToUnicodeString("\\u3042\\u3042\\u30A2")); | |
2393 | |
2394 delete t; | |
2395 | |
2396 #if 1 | |
2397 // This test will only work if Transliterator.ROLLBACK is | |
2398 // true. Otherwise, this test will fail, revealing a | |
2399 // limitation of global filters in incremental mode. | |
2400 Transliterator *a = | |
2401 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe,
ec); | |
2402 Transliterator *A = | |
2403 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe,
ec); | |
2404 if (U_FAILURE(ec)) { | |
2405 delete a; | |
2406 delete A; | |
2407 return; | |
2408 } | |
2409 | |
2410 Transliterator* array[3]; | |
2411 array[0] = a; | |
2412 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec); | |
2413 array[2] = A; | |
2414 if (U_FAILURE(ec)) { | |
2415 errln("FAIL: createInstance NFD"); | |
2416 delete a; | |
2417 delete A; | |
2418 delete array[1]; | |
2419 return; | |
2420 } | |
2421 | |
2422 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec)); | |
2423 if (U_FAILURE(ec)) { | |
2424 errln("FAIL: UnicodeSet constructor"); | |
2425 delete a; | |
2426 delete A; | |
2427 delete array[1]; | |
2428 delete t; | |
2429 return; | |
2430 } | |
2431 | |
2432 expect(*t, "aAaA", "bAbA"); | |
2433 | |
2434 assertTrue("countElements", t->countElements() == 3); | |
2435 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A"); | |
2436 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD"); | |
2437 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b"); | |
2438 assertSuccess("getElement", ec); | |
2439 | |
2440 delete a; | |
2441 delete A; | |
2442 delete array[1]; | |
2443 delete t; | |
2444 #endif | |
2445 | |
2446 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aei
ouyAEIOUY$smooth$macron] > | $1 $smooth ;", | |
2447 "a", | |
2448 "ax"); | |
2449 | |
2450 UnicodeString gr = CharsToUnicodeString( | |
2451 "$ddot = \\u0308 ;" | |
2452 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;" | |
2453 "$rough = \\u0314 ;" | |
2454 "($lcgvowel+ $ddot?) $rough > h | $1 ;" | |
2455 "\\u03b1 <> a ;" | |
2456 "$rough <> h ;"); | |
2457 | |
2458 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha"); | |
2459 } | |
2460 | |
2461 /** | |
2462 * Test quantified segment behavior. We want: | |
2463 * ([abc])+ > x $1 x; applied to "cba" produces "xax" | |
2464 */ | |
2465 void TransliteratorTest::TestQuantifiedSegment(void) { | |
2466 // The normal case | |
2467 expect("([abc]+) > x $1 x;", "cba", "xcbax"); | |
2468 | |
2469 // The tricky case; the quantifier is around the segment | |
2470 expect("([abc])+ > x $1 x;", "cba", "xax"); | |
2471 | |
2472 // Tricky case in reverse direction | |
2473 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); | |
2474 | |
2475 // Check post-context segment | |
2476 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); | |
2477 | |
2478 // Test toRule/toPattern for non-quantified segment. | |
2479 // Careful with spacing here. | |
2480 UnicodeString r("([a-c]){q} > x $1 x;"); | |
2481 UParseError pe; | |
2482 UErrorCode ec = U_ZERO_ERROR; | |
2483 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD,
pe, ec); | |
2484 if (U_FAILURE(ec)) { | |
2485 errln("FAIL: createFromRules"); | |
2486 delete t; | |
2487 return; | |
2488 } | |
2489 UnicodeString rr; | |
2490 t->toRules(rr, TRUE); | |
2491 if (r != rr) { | |
2492 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""
); | |
2493 } else { | |
2494 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); | |
2495 } | |
2496 delete t; | |
2497 | |
2498 // Test toRule/toPattern for quantified segment. | |
2499 // Careful with spacing here. | |
2500 r = "([a-c])+{q} > x $1 x;"; | |
2501 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); | |
2502 if (U_FAILURE(ec)) { | |
2503 errln("FAIL: createFromRules"); | |
2504 delete t; | |
2505 return; | |
2506 } | |
2507 t->toRules(rr, TRUE); | |
2508 if (r != rr) { | |
2509 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""
); | |
2510 } else { | |
2511 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); | |
2512 } | |
2513 delete t; | |
2514 } | |
2515 | |
2516 //====================================================================== | |
2517 // Ram's tests | |
2518 //====================================================================== | |
2519 void TransliteratorTest::TestDevanagariLatinRT(){ | |
2520 const int MAX_LEN= 52; | |
2521 const char* const source[MAX_LEN] = { | |
2522 "bh\\u0101rata", | |
2523 "kra", | |
2524 "k\\u1E63a", | |
2525 "khra", | |
2526 "gra", | |
2527 "\\u1E45ra", | |
2528 "cra", | |
2529 "chra", | |
2530 "j\\u00F1a", | |
2531 "jhra", | |
2532 "\\u00F1ra", | |
2533 "\\u1E6Dya", | |
2534 "\\u1E6Dhra", | |
2535 "\\u1E0Dya", | |
2536 //"r\\u0323ya", // \u095c is not valid in Devanagari | |
2537 "\\u1E0Dhya", | |
2538 "\\u1E5Bhra", | |
2539 "\\u1E47ra", | |
2540 "tta", | |
2541 "thra", | |
2542 "dda", | |
2543 "dhra", | |
2544 "nna", | |
2545 "pra", | |
2546 "phra", | |
2547 "bra", | |
2548 "bhra", | |
2549 "mra", | |
2550 "\\u1E49ra", | |
2551 //"l\\u0331ra", | |
2552 "yra", | |
2553 "\\u1E8Fra", | |
2554 //"l-", | |
2555 "vra", | |
2556 "\\u015Bra", | |
2557 "\\u1E63ra", | |
2558 "sra", | |
2559 "hma", | |
2560 "\\u1E6D\\u1E6Da", | |
2561 "\\u1E6D\\u1E6Dha", | |
2562 "\\u1E6Dh\\u1E6Dha", | |
2563 "\\u1E0D\\u1E0Da", | |
2564 "\\u1E0D\\u1E0Dha", | |
2565 "\\u1E6Dya", | |
2566 "\\u1E6Dhya", | |
2567 "\\u1E0Dya", | |
2568 "\\u1E0Dhya", | |
2569 // Not roundtrippable -- | |
2570 // \\u0939\\u094d\\u094d\\u092E - hma | |
2571 // \\u0939\\u094d\\u092E - hma | |
2572 // CharsToUnicodeString("hma"), | |
2573 "hya", | |
2574 "\\u015Br\\u0325", | |
2575 "\\u015Bca", | |
2576 "\\u0115", | |
2577 "san\\u0304j\\u012Bb s\\u0113nagupta", | |
2578 "\\u0101nand vaddir\\u0101ju", | |
2579 "\\u0101", | |
2580 "a" | |
2581 }; | |
2582 const char* const expected[MAX_LEN] = { | |
2583 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */ | |
2584 "\\u0915\\u094D\\u0930", /* kra */ | |
2585 "\\u0915\\u094D\\u0937", /* ks\\u0323a */ | |
2586 "\\u0916\\u094D\\u0930", /* khra */ | |
2587 "\\u0917\\u094D\\u0930", /* gra */ | |
2588 "\\u0919\\u094D\\u0930", /* n\\u0307ra */ | |
2589 "\\u091A\\u094D\\u0930", /* cra */ | |
2590 "\\u091B\\u094D\\u0930", /* chra */ | |
2591 "\\u091C\\u094D\\u091E", /* jn\\u0303a */ | |
2592 "\\u091D\\u094D\\u0930", /* jhra */ | |
2593 "\\u091E\\u094D\\u0930", /* n\\u0303ra */ | |
2594 "\\u091F\\u094D\\u092F", /* t\\u0323ya */ | |
2595 "\\u0920\\u094D\\u0930", /* t\\u0323hra */ | |
2596 "\\u0921\\u094D\\u092F", /* d\\u0323ya */ | |
2597 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid
in Devanagari | |
2598 "\\u0922\\u094D\\u092F", /* d\\u0323hya */ | |
2599 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */ | |
2600 "\\u0923\\u094D\\u0930", /* n\\u0323ra */ | |
2601 "\\u0924\\u094D\\u0924", /* tta */ | |
2602 "\\u0925\\u094D\\u0930", /* thra */ | |
2603 "\\u0926\\u094D\\u0926", /* dda */ | |
2604 "\\u0927\\u094D\\u0930", /* dhra */ | |
2605 "\\u0928\\u094D\\u0928", /* nna */ | |
2606 "\\u092A\\u094D\\u0930", /* pra */ | |
2607 "\\u092B\\u094D\\u0930", /* phra */ | |
2608 "\\u092C\\u094D\\u0930", /* bra */ | |
2609 "\\u092D\\u094D\\u0930", /* bhra */ | |
2610 "\\u092E\\u094D\\u0930", /* mra */ | |
2611 "\\u0929\\u094D\\u0930", /* n\\u0331ra */ | |
2612 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */ | |
2613 "\\u092F\\u094D\\u0930", /* yra */ | |
2614 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */ | |
2615 //"l-", | |
2616 "\\u0935\\u094D\\u0930", /* vra */ | |
2617 "\\u0936\\u094D\\u0930", /* s\\u0301ra */ | |
2618 "\\u0937\\u094D\\u0930", /* s\\u0323ra */ | |
2619 "\\u0938\\u094D\\u0930", /* sra */ | |
2620 "\\u0939\\u094d\\u092E", /* hma */ | |
2621 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */ | |
2622 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */ | |
2623 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/ | |
2624 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */ | |
2625 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */ | |
2626 "\\u091F\\u094D\\u092F", /* t\\u0323ya */ | |
2627 "\\u0920\\u094D\\u092F", /* t\\u0323hya */ | |
2628 "\\u0921\\u094D\\u092F", /* d\\u0323ya */ | |
2629 "\\u0922\\u094D\\u092F", /* d\\u0323hya */ | |
2630 // "hma", /* hma */ | |
2631 "\\u0939\\u094D\\u092F", /* hya */ | |
2632 "\\u0936\\u0943", /* s\\u0301r\\u0325a */ | |
2633 "\\u0936\\u094D\\u091A", /* s\\u0301ca */ | |
2634 "\\u090d", /* e\\u0306 */ | |
2635 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917
\\u0941\\u092A\\u094D\\u0924", | |
2636 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F
\\u0930\\u093E\\u091C\\u0941", | |
2637 "\\u0906", | |
2638 "\\u0905", | |
2639 }; | |
2640 UErrorCode status = U_ZERO_ERROR; | |
2641 UParseError parseError; | |
2642 UnicodeString message; | |
2643 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari"
, UTRANS_FORWARD, parseError, status); | |
2644 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin"
, UTRANS_FORWARD, parseError, status); | |
2645 if(U_FAILURE(status)){ | |
2646 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorN
ame(status)); | |
2647 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostConte
xt: " + prettify( parseError.postContext) ); | |
2648 return; | |
2649 } | |
2650 UnicodeString gotResult; | |
2651 for(int i= 0; i<MAX_LEN; i++){ | |
2652 gotResult = source[i]; | |
2653 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(
expected[i])); | |
2654 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeStrin
g(source[i])); | |
2655 } | |
2656 delete latinToDev; | |
2657 delete devToLatin; | |
2658 } | |
2659 | |
2660 void TransliteratorTest::TestTeluguLatinRT(){ | |
2661 const int MAX_LEN=10; | |
2662 const char* const source[MAX_LEN] = { | |
2663 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Ragh
uram Viswanadha */ | |
2664 "\\u0101nand vaddir\\u0101ju", /* Anan
d Vaddiraju */ | |
2665 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Raje
ev Kasarabada */ | |
2666 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanj
eev kasarabada */ | |
2667 "san\\u0304j\\u012Bb sen'gupta", /* sanj
ib sengupata */ | |
2668 "amar\\u0113ndra hanum\\u0101nula", /* Amar
endra hanumanula */ | |
2669 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi
Kumar Viswanadha */ | |
2670 "\\u0101ditya kandr\\u0113gula", /* Adit
ya Kandregula */ | |
2671 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shri
dhar Kantamsetty */ | |
2672 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madh
av Desetty */ | |
2673 }; | |
2674 | |
2675 const char* const expected[MAX_LEN] = { | |
2676 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36
\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", | |
2677 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F
\\u0C30\\u0C3E\\u0C1C\\u0C41", | |
2678 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c
\\u0c3e\\u0c26", | |
2679 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c
\\u0c3e\\u0c26", | |
2680 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d
\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24", | |
2681 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28
\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32", | |
2682 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c3
5\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", | |
2683 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D
\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32", | |
2684 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f
\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", | |
2685 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f
\\u0c4d\\u0c1f\\u0c3f", | |
2686 }; | |
2687 | |
2688 UErrorCode status = U_ZERO_ERROR; | |
2689 UParseError parseError; | |
2690 UnicodeString message; | |
2691 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UT
RANS_FORWARD, parseError, status); | |
2692 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UT
RANS_FORWARD, parseError, status); | |
2693 if(U_FAILURE(status)){ | |
2694 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorN
ame(status)); | |
2695 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostConte
xt: " + prettify( parseError.postContext) ); | |
2696 return; | |
2697 } | |
2698 UnicodeString gotResult; | |
2699 for(int i= 0; i<MAX_LEN; i++){ | |
2700 gotResult = source[i]; | |
2701 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(
expected[i])); | |
2702 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeStrin
g(source[i])); | |
2703 } | |
2704 delete latinToDev; | |
2705 delete devToLatin; | |
2706 } | |
2707 | |
2708 void TransliteratorTest::TestSanskritLatinRT(){ | |
2709 const int MAX_LEN =16; | |
2710 const char* const source[MAX_LEN] = { | |
2711 "rmk\\u1E63\\u0113t", | |
2712 "\\u015Br\\u012Bmad", | |
2713 "bhagavadg\\u012Bt\\u0101", | |
2714 "adhy\\u0101ya", | |
2715 "arjuna", | |
2716 "vi\\u1E63\\u0101da", | |
2717 "y\\u014Dga", | |
2718 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", | |
2719 "uv\\u0101cr\\u0325", | |
2720 "dharmak\\u1E63\\u0113tr\\u0113", | |
2721 "kuruk\\u1E63\\u0113tr\\u0113", | |
2722 "samav\\u0113t\\u0101", | |
2723 "yuyutsava\\u1E25", | |
2724 "m\\u0101mak\\u0101\\u1E25", | |
2725 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", | |
2726 "kimakurvata", | |
2727 "san\\u0304java", | |
2728 }; | |
2729 const char* const expected[MAX_LEN] = { | |
2730 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D"
, | |
2731 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", | |
2732 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", | |
2733 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", | |
2734 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", | |
2735 "\\u0935\\u093f\\u0937\\u093e\\u0926", | |
2736 "\\u092f\\u094b\\u0917", | |
2737 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930"
, | |
2738 "\\u0909\\u0935\\u093E\\u091A\\u0943", | |
2739 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\
\u0930\\u0947", | |
2740 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\
\u0930\\u0947", | |
2741 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", | |
2742 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", | |
2743 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", | |
2744 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u
0948\\u0935", | |
2745 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", | |
2746 "\\u0938\\u0902\\u091c\\u0935", | |
2747 }; | |
2748 UErrorCode status = U_ZERO_ERROR; | |
2749 UParseError parseError; | |
2750 UnicodeString message; | |
2751 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari"
, UTRANS_FORWARD, parseError, status); | |
2752 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin"
, UTRANS_FORWARD, parseError, status); | |
2753 if(U_FAILURE(status)){ | |
2754 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorN
ame(status)); | |
2755 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostConte
xt: " + prettify( parseError.postContext) ); | |
2756 return; | |
2757 } | |
2758 UnicodeString gotResult; | |
2759 for(int i= 0; i<MAX_LEN; i++){ | |
2760 gotResult = source[i]; | |
2761 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(
expected[i])); | |
2762 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeStrin
g(source[i])); | |
2763 } | |
2764 delete latinToDev; | |
2765 delete devToLatin; | |
2766 } | |
2767 | |
2768 | |
2769 void TransliteratorTest::TestCompoundLatinRT(){ | |
2770 const char* const source[] = { | |
2771 "rmk\\u1E63\\u0113t", | |
2772 "\\u015Br\\u012Bmad", | |
2773 "bhagavadg\\u012Bt\\u0101", | |
2774 "adhy\\u0101ya", | |
2775 "arjuna", | |
2776 "vi\\u1E63\\u0101da", | |
2777 "y\\u014Dga", | |
2778 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", | |
2779 "uv\\u0101cr\\u0325", | |
2780 "dharmak\\u1E63\\u0113tr\\u0113", | |
2781 "kuruk\\u1E63\\u0113tr\\u0113", | |
2782 "samav\\u0113t\\u0101", | |
2783 "yuyutsava\\u1E25", | |
2784 "m\\u0101mak\\u0101\\u1E25", | |
2785 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", | |
2786 "kimakurvata", | |
2787 "san\\u0304java" | |
2788 }; | |
2789 const int MAX_LEN = sizeof(source)/sizeof(source[0]); | |
2790 const char* const expected[MAX_LEN] = { | |
2791 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D"
, | |
2792 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", | |
2793 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", | |
2794 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", | |
2795 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", | |
2796 "\\u0935\\u093f\\u0937\\u093e\\u0926", | |
2797 "\\u092f\\u094b\\u0917", | |
2798 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930"
, | |
2799 "\\u0909\\u0935\\u093E\\u091A\\u0943", | |
2800 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\
\u0930\\u0947", | |
2801 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\
\u0930\\u0947", | |
2802 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", | |
2803 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", | |
2804 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", | |
2805 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\
\u0948\\u0935", | |
2806 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", | |
2807 "\\u0938\\u0902\\u091c\\u0935" | |
2808 }; | |
2809 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) { | |
2810 errln("error in TestCompoundLatinRT: source[] and expected[] have differ
ent lengths!"); | |
2811 return; | |
2812 } | |
2813 | |
2814 UErrorCode status = U_ZERO_ERROR; | |
2815 UParseError parseError; | |
2816 UnicodeString message; | |
2817 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari
-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); | |
2818 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Deva
nagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); | |
2819 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari
-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status); | |
2820 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telu
gu;Telugu-Latin", UTRANS_FORWARD, parseError, status); | |
2821 | |
2822 if(U_FAILURE(status)){ | |
2823 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorN
ame(status)); | |
2824 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostConte
xt: " + prettify( parseError.postContext) ); | |
2825 return; | |
2826 } | |
2827 UnicodeString gotResult; | |
2828 for(int i= 0; i<MAX_LEN; i++){ | |
2829 gotResult = source[i]; | |
2830 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicode
String(expected[i])); | |
2831 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicode
String(source[i])); | |
2832 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicode
String(source[i])); | |
2833 | |
2834 } | |
2835 delete(latinToDevToLatin); | |
2836 delete(devToLatinToDev); | |
2837 delete(devToTelToDev); | |
2838 delete(latinToTelToLatin); | |
2839 } | |
2840 | |
2841 /** | |
2842 * Test Gurmukhi-Devanagari Tippi and Bindi | |
2843 */ | |
2844 void TransliteratorTest::TestGurmukhiDevanagari(){ | |
2845 // the rule says: | |
2846 // (\u0902) (when preceded by vowel) ---> (\u0A02) | |
2847 // (\u0902) (when preceded by consonant) ---> (\u0A70) | |
2848 UErrorCode status = U_ZERO_ERROR; | |
2849 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u091
4 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(),
status); | |
2850 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, U
S_INV).unescape(), status); | |
2851 UParseError parseError; | |
2852 | |
2853 UnicodeSetIterator vIter(vowel); | |
2854 UnicodeSetIterator nvIter(non_vowel); | |
2855 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi"
,UTRANS_FORWARD, parseError, status); | |
2856 if(U_FAILURE(status)) { | |
2857 dataerrln("Error creating transliterator %s", u_errorName(status)); | |
2858 delete trans; | |
2859 return; | |
2860 } | |
2861 UnicodeString src (" \\u0902", -1, US_INV); | |
2862 UnicodeString expected(" \\u0A02", -1, US_INV); | |
2863 src = src.unescape(); | |
2864 expected= expected.unescape(); | |
2865 | |
2866 while(vIter.next()){ | |
2867 src.setCharAt(0,(UChar) vIter.getCodepoint()); | |
2868 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100)); | |
2869 expect(*trans,src,expected); | |
2870 } | |
2871 | |
2872 expected.setCharAt(1,0x0A70); | |
2873 while(nvIter.next()){ | |
2874 //src.setCharAt(0,(char) nvIter.codepoint); | |
2875 src.setCharAt(0,(UChar)nvIter.getCodepoint()); | |
2876 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100)); | |
2877 expect(*trans,src,expected); | |
2878 } | |
2879 delete trans; | |
2880 } | |
2881 /** | |
2882 * Test instantiation from a locale. | |
2883 */ | |
2884 void TransliteratorTest::TestLocaleInstantiation(void) { | |
2885 UParseError pe; | |
2886 UErrorCode ec = U_ZERO_ERROR; | |
2887 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FOR
WARD, pe, ec); | |
2888 if (U_FAILURE(ec)) { | |
2889 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec)); | |
2890 delete t; | |
2891 return; | |
2892 } | |
2893 expect(*t, CharsToUnicodeString("\\u0430"), "a"); | |
2894 delete t; | |
2895 | |
2896 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec); | |
2897 if (U_FAILURE(ec)) { | |
2898 errln("FAIL: createInstance(en-el)"); | |
2899 delete t; | |
2900 return; | |
2901 } | |
2902 expect(*t, "a", CharsToUnicodeString("\\u03B1")); | |
2903 delete t; | |
2904 } | |
2905 | |
2906 /** | |
2907 * Test title case handling of accent (should ignore accents) | |
2908 */ | |
2909 void TransliteratorTest::TestTitleAccents(void) { | |
2910 UParseError pe; | |
2911 UErrorCode ec = U_ZERO_ERROR; | |
2912 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD,
pe, ec); | |
2913 if (U_FAILURE(ec)) { | |
2914 errln("FAIL: createInstance(Title)"); | |
2915 delete t; | |
2916 return; | |
2917 } | |
2918 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString
("A\\u0300b Can't Abe")); | |
2919 delete t; | |
2920 } | |
2921 | |
2922 /** | |
2923 * Basic test of a locale resource based rule. | |
2924 */ | |
2925 void TransliteratorTest::TestLocaleResource() { | |
2926 const char* DATA[] = { | |
2927 // id from to | |
2928 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0", | |
2929 "Latin-el", "b", "\\u03bc\\u03c0", | |
2930 "Latin-Greek", "b", "\\u03B2", | |
2931 "Greek-Latin/UNGEGN", "\\u03B2", "v", | |
2932 "el-Latin", "\\u03B2", "v", | |
2933 "Greek-Latin", "\\u03B2", "b", | |
2934 }; | |
2935 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); | |
2936 for (int32_t i=0; i<DATA_length; i+=3) { | |
2937 UParseError pe; | |
2938 UErrorCode ec = U_ZERO_ERROR; | |
2939 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWA
RD, pe, ec); | |
2940 if (U_FAILURE(ec)) { | |
2941 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - "
+ u_errorName(ec)); | |
2942 delete t; | |
2943 continue; | |
2944 } | |
2945 expect(*t, CharsToUnicodeString(DATA[i+1]), | |
2946 CharsToUnicodeString(DATA[i+2])); | |
2947 delete t; | |
2948 } | |
2949 } | |
2950 | |
2951 /** | |
2952 * Make sure parse errors reference the right line. | |
2953 */ | |
2954 void TransliteratorTest::TestParseError() { | |
2955 static const char* rule = | |
2956 "a > b;\n" | |
2957 "# more stuff\n" | |
2958 "d << b;"; | |
2959 UErrorCode ec = U_ZERO_ERROR; | |
2960 UParseError pe; | |
2961 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWA
RD, pe, ec); | |
2962 delete t; | |
2963 if (U_FAILURE(ec)) { | |
2964 UnicodeString err(pe.preContext); | |
2965 err.append((UChar)124/*|*/).append(pe.postContext); | |
2966 if (err.indexOf("d << b") >= 0) { | |
2967 logln("Ok: " + err); | |
2968 } else { | |
2969 errln("FAIL: " + err); | |
2970 } | |
2971 } | |
2972 else { | |
2973 errln("FAIL: no syntax error"); | |
2974 } | |
2975 static const char* maskingRule = | |
2976 "a>x;\n" | |
2977 "# more stuff\n" | |
2978 "ab>y;"; | |
2979 ec = U_ZERO_ERROR; | |
2980 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe
, ec); | |
2981 if (ec != U_RULE_MASK_ERROR) { | |
2982 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec))
; | |
2983 } | |
2984 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) { | |
2985 errln("FAIL: did not get expected precontext"); | |
2986 } | |
2987 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) { | |
2988 errln("FAIL: did not get expected postcontext"); | |
2989 } | |
2990 } | |
2991 | |
2992 /** | |
2993 * Make sure sets on output are disallowed. | |
2994 */ | |
2995 void TransliteratorTest::TestOutputSet() { | |
2996 UnicodeString rule = "$set = [a-cm-n]; b > $set;"; | |
2997 UErrorCode ec = U_ZERO_ERROR; | |
2998 UParseError pe; | |
2999 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWA
RD, pe, ec); | |
3000 delete t; | |
3001 if (U_FAILURE(ec)) { | |
3002 UnicodeString err(pe.preContext); | |
3003 err.append((UChar)124/*|*/).append(pe.postContext); | |
3004 logln("Ok: " + err); | |
3005 return; | |
3006 } | |
3007 errln("FAIL: No syntax error"); | |
3008 } | |
3009 | |
3010 /** | |
3011 * Test the use variable range pragma, making sure that use of | |
3012 * variable range characters is detected and flagged as an error. | |
3013 */ | |
3014 void TransliteratorTest::TestVariableRange() { | |
3015 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; | |
3016 UErrorCode ec = U_ZERO_ERROR; | |
3017 UParseError pe; | |
3018 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWA
RD, pe, ec); | |
3019 delete t; | |
3020 if (U_FAILURE(ec)) { | |
3021 UnicodeString err(pe.preContext); | |
3022 err.append((UChar)124/*|*/).append(pe.postContext); | |
3023 logln("Ok: " + err); | |
3024 return; | |
3025 } | |
3026 errln("FAIL: No syntax error"); | |
3027 } | |
3028 | |
3029 /** | |
3030 * Test invalid post context error handling | |
3031 */ | |
3032 void TransliteratorTest::TestInvalidPostContext() { | |
3033 UnicodeString rule = "a}b{c>d;"; | |
3034 UErrorCode ec = U_ZERO_ERROR; | |
3035 UParseError pe; | |
3036 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWA
RD, pe, ec); | |
3037 delete t; | |
3038 if (U_FAILURE(ec)) { | |
3039 UnicodeString err(pe.preContext); | |
3040 err.append((UChar)124/*|*/).append(pe.postContext); | |
3041 if (err.indexOf("a}b{c") >= 0) { | |
3042 logln("Ok: " + err); | |
3043 } else { | |
3044 errln("FAIL: " + err); | |
3045 } | |
3046 return; | |
3047 } | |
3048 errln("FAIL: No syntax error"); | |
3049 } | |
3050 | |
3051 /** | |
3052 * Test ID form variants | |
3053 */ | |
3054 void TransliteratorTest::TestIDForms() { | |
3055 const char* DATA[] = { | |
3056 "NFC", NULL, "NFD", | |
3057 "nfd", NULL, "NFC", // make sure case is ignored | |
3058 "Any-NFKD", NULL, "Any-NFKC", | |
3059 "Null", NULL, "Null", | |
3060 "-nfkc", "nfkc", "NFKD", | |
3061 "-nfkc/", "nfkc", "NFKD", | |
3062 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN", | |
3063 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", | |
3064 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", | |
3065 "Source-", NULL, NULL, | |
3066 "Source/Variant-", NULL, NULL, | |
3067 "Source-/Variant", NULL, NULL, | |
3068 "/Variant", NULL, NULL, | |
3069 "/Variant-", NULL, NULL, | |
3070 "-/Variant", NULL, NULL, | |
3071 "-/", NULL, NULL, | |
3072 "-", NULL, NULL, | |
3073 "/", NULL, NULL, | |
3074 }; | |
3075 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); | |
3076 | |
3077 for (int32_t i=0; i<DATA_length; i+=3) { | |
3078 const char* ID = DATA[i]; | |
3079 const char* expID = DATA[i+1]; | |
3080 const char* expInvID = DATA[i+2]; | |
3081 UBool expValid = (expInvID != NULL); | |
3082 if (expID == NULL) { | |
3083 expID = ID; | |
3084 } | |
3085 UParseError pe; | |
3086 UErrorCode ec = U_ZERO_ERROR; | |
3087 Transliterator *t = | |
3088 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); | |
3089 if (U_FAILURE(ec)) { | |
3090 if (!expValid) { | |
3091 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorN
ame(ec)); | |
3092 } else { | |
3093 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " +
u_errorName(ec)); | |
3094 } | |
3095 delete t; | |
3096 continue; | |
3097 } | |
3098 Transliterator *u = t->createInverse(ec); | |
3099 if (U_FAILURE(ec)) { | |
3100 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID); | |
3101 delete t; | |
3102 delete u; | |
3103 continue; | |
3104 } | |
3105 if (t->getID() == expID && | |
3106 u->getID() == expInvID) { | |
3107 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID); | |
3108 } else { | |
3109 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " + | |
3110 t->getID() + " x getInverse() => " + u->getID() + | |
3111 ", expected " + expInvID); | |
3112 } | |
3113 delete t; | |
3114 delete u; | |
3115 } | |
3116 } | |
3117 | |
3118 static const UChar SPACE[] = {32,0}; | |
3119 static const UChar NEWLINE[] = {10,0}; | |
3120 static const UChar RETURN[] = {13,0}; | |
3121 static const UChar EMPTY[] = {0}; | |
3122 | |
3123 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator&
t2, | |
3124 const UnicodeString& testRulesForward) { | |
3125 UnicodeString rules2; t2.toRules(rules2, TRUE); | |
3126 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); | |
3127 rules2.findAndReplace(SPACE, EMPTY); | |
3128 rules2.findAndReplace(NEWLINE, EMPTY); | |
3129 rules2.findAndReplace(RETURN, EMPTY); | |
3130 | |
3131 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, E
MPTY); | |
3132 | |
3133 if (rules2 != testRules) { | |
3134 errln(label); | |
3135 logln((UnicodeString)"GENERATED RULES: " + rules2); | |
3136 logln((UnicodeString)"SHOULD BE: " + testRulesForward); | |
3137 } | |
3138 } | |
3139 | |
3140 /** | |
3141 * Mark's toRules test. | |
3142 */ | |
3143 void TransliteratorTest::TestToRulesMark() { | |
3144 const char* testRules = | |
3145 "::[[:Latin:][:Mark:]];" | |
3146 "::NFKD (NFC);" | |
3147 "::Lower (Lower);" | |
3148 "a <> \\u03B1;" // alpha | |
3149 "::NFKC (NFD);" | |
3150 "::Upper (Lower);" | |
3151 "::Lower ();" | |
3152 "::([[:Greek:][:Mark:]]);" | |
3153 ; | |
3154 const char* testRulesForward = | |
3155 "::[[:Latin:][:Mark:]];" | |
3156 "::NFKD(NFC);" | |
3157 "::Lower(Lower);" | |
3158 "a > \\u03B1;" | |
3159 "::NFKC(NFD);" | |
3160 "::Upper (Lower);" | |
3161 "::Lower ();" | |
3162 ; | |
3163 const char* testRulesBackward = | |
3164 "::[[:Greek:][:Mark:]];" | |
3165 "::Lower (Upper);" | |
3166 "::NFD(NFKC);" | |
3167 "\\u03B1 > a;" | |
3168 "::Lower(Lower);" | |
3169 "::NFC(NFKD);" | |
3170 ; | |
3171 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute | |
3172 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute | |
3173 | |
3174 UParseError pe; | |
3175 UErrorCode ec = U_ZERO_ERROR; | |
3176 Transliterator *t2 = Transliterator::createFromRules("source-target", Unicod
eString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec); | |
3177 Transliterator *t3 = Transliterator::createFromRules("target-source", Unicod
eString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec); | |
3178 | |
3179 if (U_FAILURE(ec)) { | |
3180 delete t2; | |
3181 delete t3; | |
3182 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec)); | |
3183 return; | |
3184 } | |
3185 | |
3186 expect(*t2, source, target); | |
3187 expect(*t3, target, source); | |
3188 | |
3189 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1
, US_INV)); | |
3190 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward,
-1, US_INV)); | |
3191 | |
3192 delete t2; | |
3193 delete t3; | |
3194 } | |
3195 | |
3196 /** | |
3197 * Test Escape and Unescape transliterators. | |
3198 */ | |
3199 void TransliteratorTest::TestEscape() { | |
3200 UParseError pe; | |
3201 UErrorCode ec; | |
3202 Transliterator *t; | |
3203 | |
3204 ec = U_ZERO_ERROR; | |
3205 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec); | |
3206 if (U_FAILURE(ec)) { | |
3207 errln((UnicodeString)"FAIL: createInstance"); | |
3208 } else { | |
3209 expect(*t, | |
3210 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"), | |
3211 "@12Q"); | |
3212 } | |
3213 delete t; | |
3214 | |
3215 ec = U_ZERO_ERROR; | |
3216 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec); | |
3217 if (U_FAILURE(ec)) { | |
3218 errln((UnicodeString)"FAIL: createInstance"); | |
3219 } else { | |
3220 expect(*t, | |
3221 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), | |
3222 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED")); | |
3223 } | |
3224 delete t; | |
3225 | |
3226 ec = U_ZERO_ERROR; | |
3227 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec); | |
3228 if (U_FAILURE(ec)) { | |
3229 errln((UnicodeString)"FAIL: createInstance"); | |
3230 } else { | |
3231 expect(*t, | |
3232 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), | |
3233 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED")); | |
3234 } | |
3235 delete t; | |
3236 | |
3237 ec = U_ZERO_ERROR; | |
3238 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec); | |
3239 if (U_FAILURE(ec)) { | |
3240 errln((UnicodeString)"FAIL: createInstance"); | |
3241 } else { | |
3242 expect(*t, | |
3243 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), | |
3244 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}")); | |
3245 } | |
3246 delete t; | |
3247 } | |
3248 | |
3249 | |
3250 void TransliteratorTest::TestAnchorMasking(){ | |
3251 UnicodeString rule ("^a > Q; a > q;"); | |
3252 UErrorCode status= U_ZERO_ERROR; | |
3253 UParseError parseError; | |
3254 | |
3255 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWA
RD,parseError,status); | |
3256 if(U_FAILURE(status)){ | |
3257 errln(UnicodeString("FAIL: ") + "ID" + | |
3258 ".createFromRules() => bad rules" + | |
3259 /*", parse error " + parseError.code +*/ | |
3260 ", line " + parseError.line + | |
3261 ", offset " + parseError.offset + | |
3262 ", context " + prettify(parseError.preContext, TRUE) + | |
3263 ", rules: " + prettify(rule, TRUE)); | |
3264 } | |
3265 delete t; | |
3266 } | |
3267 | |
3268 /** | |
3269 * Make sure display names of variants look reasonable. | |
3270 */ | |
3271 void TransliteratorTest::TestDisplayName() { | |
3272 #if UCONFIG_NO_FORMATTING | |
3273 logln("Skipping, UCONFIG_NO_FORMATTING is set\n"); | |
3274 return; | |
3275 #else | |
3276 static const char* DATA[] = { | |
3277 // ID, forward name, reverse name | |
3278 // Update the text as necessary -- the important thing is | |
3279 // not the text itself, but how various cases are handled. | |
3280 | |
3281 // Basic test | |
3282 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", | |
3283 | |
3284 // Variants | |
3285 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", | |
3286 | |
3287 // Target-only IDs | |
3288 "NFC", "Any to NFC", "Any to NFD", | |
3289 }; | |
3290 | |
3291 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); | |
3292 | |
3293 Locale US("en", "US"); | |
3294 | |
3295 for (int32_t i=0; i<DATA_length; i+=3) { | |
3296 UnicodeString name; | |
3297 Transliterator::getDisplayName(DATA[i], US, name); | |
3298 if (name != DATA[i+1]) { | |
3299 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() =>
" + | |
3300 name + ", expected " + DATA[i+1]); | |
3301 } else { | |
3302 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + na
me); | |
3303 } | |
3304 UErrorCode ec = U_ZERO_ERROR; | |
3305 UParseError pe; | |
3306 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVER
SE, pe, ec); | |
3307 if (U_FAILURE(ec)) { | |
3308 delete t; | |
3309 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec)); | |
3310 continue; | |
3311 } | |
3312 name = Transliterator::getDisplayName(t->getID(), US, name); | |
3313 if (name != DATA[i+2]) { | |
3314 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName()
=> " + | |
3315 name + ", expected " + DATA[i+2]); | |
3316 } else { | |
3317 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " +
name); | |
3318 } | |
3319 delete t; | |
3320 } | |
3321 #endif | |
3322 } | |
3323 | |
3324 void TransliteratorTest::TestSpecialCases(void) { | |
3325 const UnicodeString registerRules[] = { | |
3326 "Any-Dev1", "x > X; y > Y;", | |
3327 "Any-Dev2", "XY > Z", | |
3328 "Greek-Latin/FAKE", | |
3329 CharsToUnicodeString | |
3330 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]
] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][
\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"), | |
3331 "" // END MARKER | |
3332 }; | |
3333 | |
3334 const UnicodeString testCases[] = { | |
3335 // NORMALIZATION | |
3336 // should add more test cases | |
3337 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\u
FF9E\\u03D3"), "", | |
3338 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\u
FF9E\\u03D3"), "", | |
3339 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\u
FF9E\\u03D3"), "", | |
3340 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\u
FF9E\\u03D3"), "", | |
3341 | |
3342 // mp -> b BUG | |
3343 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", | |
3344 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", | |
3345 | |
3346 // check for devanagari bug | |
3347 "nfd;Dev1;Dev2;nfc", "xy", "Z", | |
3348 | |
3349 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE | |
3350 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\
u01C9 ") + DESERET_dee + DESERET_DEE, | |
3351 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\
\u01C9 ") + DESERET_DEE + DESERET_dee, | |
3352 | |
3353 //TODO: enable this test once Titlecase works right | |
3354 /* | |
3355 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u
01C9 ") + DESERET_dee + DESERET_DEE, | |
3356 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + D
ESERET_DEE + DESERET_dee, | |
3357 */ | |
3358 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u0
1C8\\u01C9 ") + DESERET_dee + DESERET_DEE, | |
3359 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7
") + DESERET_DEE + DESERET_DEE, | |
3360 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u0
1C8\\u01C9 ") + DESERET_dee + DESERET_DEE, | |
3361 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u
01C9\\u01C9 ") + DESERET_dee + DESERET_dee, | |
3362 | |
3363 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u0
1C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "", | |
3364 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u0
1C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "", | |
3365 | |
3366 // FORMS OF S | |
3367 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u0
3C2\\u03C3"), | |
3368 CharsToUnicodeString("s ss s\\u0331s\\u0331") , | |
3369 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"), | |
3370 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u0
3C2\\u03C3") , | |
3371 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u0
3C3"), | |
3372 CharsToUnicodeString("s ss s\\u0331s\\u0331") , | |
3373 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"), | |
3374 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u0
3C3"), | |
3375 // Tatiana bug | |
3376 // Upper: TAT\\u02B9\\u00C2NA | |
3377 // Lower: tat\\u02B9\\u00E2na | |
3378 // Title: Tat\\u02B9\\u00E2na | |
3379 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"), | |
3380 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), | |
3381 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), | |
3382 CharsToUnicodeString("tat\\u02B9\\u00E2na"), | |
3383 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"), | |
3384 CharsToUnicodeString("Tat\\u02B9\\u00E2na"), | |
3385 | |
3386 "" // END MARKER | |
3387 }; | |
3388 | |
3389 UParseError pos; | |
3390 int32_t i; | |
3391 for (i = 0; registerRules[i].length()!=0; i+=2) { | |
3392 UErrorCode status = U_ZERO_ERROR; | |
3393 | |
3394 Transliterator *t = Transliterator::createFromRules(registerRules[0+i], | |
3395 registerRules[i+1], UTRANS_FORWARD, pos, status); | |
3396 if (U_FAILURE(status)) { | |
3397 dataerrln("Fails: Unable to create the transliterator from rules. -
%s", u_errorName(status)); | |
3398 } else { | |
3399 Transliterator::registerInstance(t); | |
3400 } | |
3401 } | |
3402 for (i = 0; testCases[i].length()!=0; i+=3) { | |
3403 UErrorCode ec = U_ZERO_ERROR; | |
3404 UParseError pe; | |
3405 const UnicodeString& name = testCases[i]; | |
3406 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD,
pe, ec); | |
3407 if (U_FAILURE(ec)) { | |
3408 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u
_errorName(ec)); | |
3409 delete t; | |
3410 continue; | |
3411 } | |
3412 const UnicodeString& id = t->getID(); | |
3413 const UnicodeString& source = testCases[i+1]; | |
3414 UnicodeString target; | |
3415 | |
3416 // Automatic generation of targets, to make it simpler to add test cases
(and more fail-safe) | |
3417 | |
3418 if (testCases[i+2].length() > 0) { | |
3419 target = testCases[i+2]; | |
3420 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) { | |
3421 Normalizer::normalize(source, UNORM_NFD, 0, target, ec); | |
3422 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) { | |
3423 Normalizer::normalize(source, UNORM_NFC, 0, target, ec); | |
3424 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) { | |
3425 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec); | |
3426 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) { | |
3427 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec); | |
3428 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) { | |
3429 target = source; | |
3430 target.toLower(Locale::getUS()); | |
3431 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) { | |
3432 target = source; | |
3433 target.toUpper(Locale::getUS()); | |
3434 } | |
3435 if (U_FAILURE(ec)) { | |
3436 errln((UnicodeString)"FAIL: Internal error normalizing " + source); | |
3437 continue; | |
3438 } | |
3439 | |
3440 expect(*t, source, target); | |
3441 delete t; | |
3442 } | |
3443 for (i = 0; registerRules[i].length()!=0; i+=2) { | |
3444 Transliterator::unregister(registerRules[i]); | |
3445 } | |
3446 } | |
3447 | |
3448 char* Char32ToEscapedChars(UChar32 ch, char* buffer) { | |
3449 if (ch <= 0xFFFF) { | |
3450 sprintf(buffer, "\\u%04x", (int)ch); | |
3451 } else { | |
3452 sprintf(buffer, "\\U%08x", (int)ch); | |
3453 } | |
3454 return buffer; | |
3455 } | |
3456 | |
3457 void TransliteratorTest::TestSurrogateCasing (void) { | |
3458 // check that casing handles surrogates | |
3459 // titlecase is currently defective | |
3460 char buffer[20]; | |
3461 UChar buffer2[20]; | |
3462 UChar32 dee; | |
3463 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee); | |
3464 UnicodeString DEE(u_totitle(dee)); | |
3465 if (DEE != DESERET_DEE) { | |
3466 err("Fails titlecase of surrogates"); | |
3467 err(Char32ToEscapedChars(dee, buffer)); | |
3468 err(", "); | |
3469 errln(Char32ToEscapedChars(DEE.char32At(0), buffer)); | |
3470 } | |
3471 | |
3472 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE; | |
3473 UnicodeString deedeeTest = DESERET_dee + DESERET_dee; | |
3474 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE; | |
3475 UErrorCode status= U_ZERO_ERROR; | |
3476 | |
3477 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL,
&status); | |
3478 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) { | |
3479 errln("Fails: Can't uppercase surrogates."); | |
3480 } | |
3481 | |
3482 status= U_ZERO_ERROR; | |
3483 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL,
&status); | |
3484 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) { | |
3485 errln("Fails: Can't lowercase surrogates."); | |
3486 } | |
3487 } | |
3488 | |
3489 static void _trans(Transliterator& t, const UnicodeString& src, | |
3490 UnicodeString& result) { | |
3491 result = src; | |
3492 t.transliterate(result); | |
3493 } | |
3494 | |
3495 static void _trans(const UnicodeString& id, const UnicodeString& src, | |
3496 UnicodeString& result, UErrorCode ec) { | |
3497 UParseError pe; | |
3498 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, e
c); | |
3499 if (U_SUCCESS(ec)) { | |
3500 _trans(*t, src, result); | |
3501 } | |
3502 delete t; | |
3503 } | |
3504 | |
3505 static UnicodeString _findMatch(const UnicodeString& source, | |
3506 const UnicodeString* pairs) { | |
3507 UnicodeString empty; | |
3508 for (int32_t i=0; pairs[i].length() > 0; i+=2) { | |
3509 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) { | |
3510 return pairs[i+1]; | |
3511 } | |
3512 } | |
3513 return empty; | |
3514 } | |
3515 | |
3516 // Check to see that incremental gets at least part way through a reasonable str
ing. | |
3517 | |
3518 void TransliteratorTest::TestIncrementalProgress(void) { | |
3519 UErrorCode ec = U_ZERO_ERROR; | |
3520 UnicodeString latinTest = "The Quick Brown Fox."; | |
3521 UnicodeString devaTest; | |
3522 _trans("Latin-Devanagari", latinTest, devaTest, ec); | |
3523 UnicodeString kataTest; | |
3524 _trans("Latin-Katakana", latinTest, kataTest, ec); | |
3525 if (U_FAILURE(ec)) { | |
3526 errln("FAIL: Internal error"); | |
3527 return; | |
3528 } | |
3529 const UnicodeString tests[] = { | |
3530 "Any", latinTest, | |
3531 "Latin", latinTest, | |
3532 "Halfwidth", latinTest, | |
3533 "Devanagari", devaTest, | |
3534 "Katakana", kataTest, | |
3535 "" // END MARKER | |
3536 }; | |
3537 | |
3538 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog."); | |
3539 int32_t i = 0, j=0, k=0; | |
3540 int32_t sources = Transliterator::countAvailableSources(); | |
3541 for (i = 0; i < sources; i++) { | |
3542 UnicodeString source; | |
3543 Transliterator::getAvailableSource(i, source); | |
3544 UnicodeString test = _findMatch(source, tests); | |
3545 if (test.length() == 0) { | |
3546 logln((UnicodeString)"Skipping " + source + "-X"); | |
3547 continue; | |
3548 } | |
3549 int32_t targets = Transliterator::countAvailableTargets(source); | |
3550 for (j = 0; j < targets; j++) { | |
3551 UnicodeString target; | |
3552 Transliterator::getAvailableTarget(j, source, target); | |
3553 int32_t variants = Transliterator::countAvailableVariants(source, ta
rget); | |
3554 for (k =0; k< variants; k++) { | |
3555 UnicodeString variant; | |
3556 UParseError err; | |
3557 UErrorCode status = U_ZERO_ERROR; | |
3558 | |
3559 Transliterator::getAvailableVariant(k, source, target, variant); | |
3560 UnicodeString id = source + "-" + target + "/" + variant; | |
3561 | |
3562 Transliterator *t = Transliterator::createInstance(id, UTRANS_FO
RWARD, err, status); | |
3563 if (U_FAILURE(status)) { | |
3564 dataerrln((UnicodeString)"FAIL: Could not create " + id); | |
3565 delete t; | |
3566 continue; | |
3567 } | |
3568 status = U_ZERO_ERROR; | |
3569 CheckIncrementalAux(t, test); | |
3570 | |
3571 UnicodeString rev; | |
3572 _trans(*t, test, rev); | |
3573 Transliterator *inv = t->createInverse(status); | |
3574 if (U_FAILURE(status)) { | |
3575 #if UCONFIG_NO_BREAK_ITERATION | |
3576 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai shoul
d fail. | |
3577 if (id.compare((UnicodeString)"Latin-Thai/") != 0) | |
3578 #endif | |
3579 errln((UnicodeString)"FAIL: Could not create inverse of
" + id); | |
3580 | |
3581 delete t; | |
3582 delete inv; | |
3583 continue; | |
3584 } | |
3585 CheckIncrementalAux(inv, rev); | |
3586 delete t; | |
3587 delete inv; | |
3588 } | |
3589 } | |
3590 } | |
3591 } | |
3592 | |
3593 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t, | |
3594 const UnicodeString& input
) { | |
3595 UErrorCode ec = U_ZERO_ERROR; | |
3596 UTransPosition pos; | |
3597 UnicodeString test = input; | |
3598 | |
3599 pos.contextStart = 0; | |
3600 pos.contextLimit = input.length(); | |
3601 pos.start = 0; | |
3602 pos.limit = input.length(); | |
3603 | |
3604 t->transliterate(test, pos, ec); | |
3605 if (U_FAILURE(ec)) { | |
3606 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec)); | |
3607 return; | |
3608 } | |
3609 UBool gotError = FALSE; | |
3610 (void)gotError; // Suppress set but not used warning. | |
3611 | |
3612 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit)
and U+XXXXX?X? | |
3613 | |
3614 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") { | |
3615 errln((UnicodeString)"No Progress, " + | |
3616 t->getID() + ": " + formatInput(test, input, pos)); | |
3617 gotError = TRUE; | |
3618 } else { | |
3619 logln((UnicodeString)"PASS Progress, " + | |
3620 t->getID() + ": " + formatInput(test, input, pos)); | |
3621 } | |
3622 t->finishTransliteration(test, pos); | |
3623 if (pos.start != pos.limit) { | |
3624 errln((UnicodeString)"Incomplete, " + | |
3625 t->getID() + ": " + formatInput(test, input, pos)); | |
3626 gotError = TRUE; | |
3627 } | |
3628 } | |
3629 | |
3630 void TransliteratorTest::TestFunction() { | |
3631 // Careful with spacing and ';' here: Phrase this exactly | |
3632 // as toRules() is going to return it. If toRules() changes | |
3633 // with regard to spacing or ';', then adjust this string. | |
3634 UnicodeString rule = | |
3635 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; | |
3636 | |
3637 UParseError pe; | |
3638 UErrorCode ec = U_ZERO_ERROR; | |
3639 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FOR
WARD, pe, ec); | |
3640 if (t == NULL) { | |
3641 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec)); | |
3642 return; | |
3643 } | |
3644 | |
3645 UnicodeString r; | |
3646 t->toRules(r, TRUE); | |
3647 if (r == rule) { | |
3648 logln((UnicodeString)"OK: toRules() => " + r); | |
3649 } else { | |
3650 errln((UnicodeString)"FAIL: toRules() => " + r + | |
3651 ", expected " + rule); | |
3652 } | |
3653 | |
3654 expect(*t, "The Quick Brown Fox", | |
3655 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)ro
wn F(f=\\u0066)ox")); | |
3656 | |
3657 delete t; | |
3658 } | |
3659 | |
3660 void TransliteratorTest::TestInvalidBackRef(void) { | |
3661 UnicodeString rule = ". > $1;"; | |
3662 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1)
; . > $1; [{}] >\\u0020;"); | |
3663 UParseError pe; | |
3664 UErrorCode ec = U_ZERO_ERROR; | |
3665 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FOR
WARD, pe, ec); | |
3666 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_
FORWARD, pe, ec); | |
3667 | |
3668 if (t != NULL) { | |
3669 errln("FAIL: createFromRules should have returned NULL"); | |
3670 delete t; | |
3671 } | |
3672 | |
3673 if (t2 != NULL) { | |
3674 errln("FAIL: createFromRules should have returned NULL"); | |
3675 delete t2; | |
3676 } | |
3677 | |
3678 if (U_SUCCESS(ec)) { | |
3679 errln("FAIL: Ok: . > $1; => no error"); | |
3680 } else { | |
3681 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec)); | |
3682 } | |
3683 } | |
3684 | |
3685 void TransliteratorTest::TestMulticharStringSet() { | |
3686 // Basic testing | |
3687 const char* rule = | |
3688 " [{aa}] > x;" | |
3689 " a > y;" | |
3690 " [b{bc}] > z;" | |
3691 "[{gd}] { e > q;" | |
3692 " e } [{fg}] > r;" ; | |
3693 | |
3694 UParseError pe; | |
3695 UErrorCode ec = U_ZERO_ERROR; | |
3696 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FOR
WARD, pe, ec); | |
3697 if (t == NULL || U_FAILURE(ec)) { | |
3698 delete t; | |
3699 errln("FAIL: createFromRules failed"); | |
3700 return; | |
3701 } | |
3702 | |
3703 expect(*t, "a aa ab bc d gd de gde gdefg ddefg", | |
3704 "y x yz z d gd de gdq gdqfg ddrfg"); | |
3705 delete t; | |
3706 | |
3707 // Overlapped string test. Make sure that when multiple | |
3708 // strings can match that the longest one is matched. | |
3709 rule = | |
3710 " [a {ab} {abc}] > x;" | |
3711 " b > y;" | |
3712 " c > z;" | |
3713 " q [t {st} {rst}] { e > p;" ; | |
3714 | |
3715 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); | |
3716 if (t == NULL || U_FAILURE(ec)) { | |
3717 delete t; | |
3718 errln("FAIL: createFromRules failed"); | |
3719 return; | |
3720 } | |
3721 | |
3722 expect(*t, "a ab abc qte qste qrste", | |
3723 "x x x qtp qstp qrstp"); | |
3724 delete t; | |
3725 } | |
3726 | |
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv | |
3728 // BEGIN TestUserFunction support factory | |
3729 | |
3730 Transliterator* _TUFF[4]; | |
3731 UnicodeString* _TUFID[4]; | |
3732 | |
3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/, | |
3734 Transliterator::Token context) { | |
3735 return _TUFF[context.integer]->clone(); | |
3736 } | |
3737 | |
3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) { | |
3739 _TUFF[n] = t; | |
3740 _TUFID[n] = new UnicodeString(ID); | |
3741 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToke
n(n)); | |
3742 } | |
3743 | |
3744 static void _TUFUnreg(int32_t n) { | |
3745 if (_TUFF[n] != NULL) { | |
3746 Transliterator::unregister(*_TUFID[n]); | |
3747 delete _TUFF[n]; | |
3748 delete _TUFID[n]; | |
3749 } | |
3750 } | |
3751 | |
3752 // END TestUserFunction support factory | |
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
3754 | |
3755 /** | |
3756 * Test that user-registered transliterators can be used under function | |
3757 * syntax. | |
3758 */ | |
3759 void TransliteratorTest::TestUserFunction() { | |
3760 | |
3761 Transliterator* t; | |
3762 UParseError pe; | |
3763 UErrorCode ec = U_ZERO_ERROR; | |
3764 | |
3765 // Setup our factory | |
3766 int32_t i; | |
3767 for (i=0; i<4; ++i) { | |
3768 _TUFF[i] = NULL; | |
3769 } | |
3770 | |
3771 // There's no need to register inverses if we don't use them | |
3772 t = Transliterator::createFromRules("gif", | |
3773 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '
<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"), | |
3774 UTRANS_FORWARD, pe, ec); | |
3775 if (t == NULL || U_FAILURE(ec)) { | |
3776 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec))
; | |
3777 return; | |
3778 } | |
3779 _TUFReg("Any-gif", t, 0); | |
3780 | |
3781 t = Transliterator::createFromRules("RemoveCurly", | |
3782 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N
' > ;"), | |
3783 UTRANS_FORWARD, pe, ec); | |
3784 if (t == NULL || U_FAILURE(ec)) { | |
3785 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(
ec)); | |
3786 goto FAIL; | |
3787 } | |
3788 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name"); | |
3789 _TUFReg("Any-RemoveCurly", t, 1); | |
3790 | |
3791 logln("Trying &hex"); | |
3792 t = Transliterator::createFromRules("hex2", | |
3793 "(.) > &hex($1);", | |
3794 UTRANS_FORWARD, pe, ec); | |
3795 if (t == NULL || U_FAILURE(ec)) { | |
3796 errln("FAIL: createFromRules"); | |
3797 goto FAIL; | |
3798 } | |
3799 logln("Registering"); | |
3800 _TUFReg("Any-hex2", t, 2); | |
3801 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec); | |
3802 if (t == NULL || U_FAILURE(ec)) { | |
3803 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec))
; | |
3804 goto FAIL; | |
3805 } | |
3806 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063")); | |
3807 delete t; | |
3808 | |
3809 logln("Trying &gif"); | |
3810 t = Transliterator::createFromRules("gif2", | |
3811 "(.) > &Gif(&Hex2($1));", | |
3812 UTRANS_FORWARD, pe, ec); | |
3813 if (t == NULL || U_FAILURE(ec)) { | |
3814 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec)); | |
3815 goto FAIL; | |
3816 } | |
3817 logln("Registering"); | |
3818 _TUFReg("Any-gif2", t, 3); | |
3819 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec); | |
3820 if (t == NULL || U_FAILURE(ec)) { | |
3821 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec))
; | |
3822 goto FAIL; | |
3823 } | |
3824 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" | |
3825 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); | |
3826 delete t; | |
3827 | |
3828 // Test that filters are allowed after & | |
3829 t = Transliterator::createFromRules("test", | |
3830 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($
1)) ' ';", | |
3831 UTRANS_FORWARD, pe, ec); | |
3832 if (t == NULL || U_FAILURE(ec)) { | |
3833 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec)); | |
3834 goto FAIL; | |
3835 } | |
3836 expect(*t, "abc", | |
3837 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMA
LL LETTER B \\u0063 LATIN SMALL LETTER C ")); | |
3838 delete t; | |
3839 | |
3840 FAIL: | |
3841 for (i=0; i<4; ++i) { | |
3842 _TUFUnreg(i); | |
3843 } | |
3844 } | |
3845 | |
3846 /** | |
3847 * Test the Any-X transliterators. | |
3848 */ | |
3849 void TransliteratorTest::TestAnyX(void) { | |
3850 UParseError parseError; | |
3851 UErrorCode status = U_ZERO_ERROR; | |
3852 Transliterator* anyLatin = | |
3853 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError,
status); | |
3854 if (anyLatin==0) { | |
3855 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status)
); | |
3856 delete anyLatin; | |
3857 return; | |
3858 } | |
3859 | |
3860 expect(*anyLatin, | |
3861 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039
A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"), | |
3862 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc")); | |
3863 | |
3864 delete anyLatin; | |
3865 } | |
3866 | |
3867 /** | |
3868 * Test Any-X transliterators with sample letters from all scripts. | |
3869 */ | |
3870 void TransliteratorTest::TestAny(void) { | |
3871 UErrorCode status = U_ZERO_ERROR; | |
3872 // Note: there is a lot of implict construction of UnicodeStrings from (char
*) in | |
3873 // function call parameters going on in this test. | |
3874 UnicodeSet alphabetic("[:alphabetic:]", status); | |
3875 if (U_FAILURE(status)) { | |
3876 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__,
u_errorName(status)); | |
3877 return; | |
3878 } | |
3879 alphabetic.freeze(); | |
3880 | |
3881 UnicodeString testString; | |
3882 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) { | |
3883 const char *scriptName = uscript_getShortName((UScriptCode)i); | |
3884 if (scriptName == NULL) { | |
3885 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FI
LE__, __LINE__, i); | |
3886 return; | |
3887 } | |
3888 | |
3889 UnicodeSet sample; | |
3890 sample.applyPropertyAlias("script", scriptName, status); | |
3891 if (U_FAILURE(status)) { | |
3892 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__,
u_errorName(status)); | |
3893 return; | |
3894 } | |
3895 sample.retainAll(alphabetic); | |
3896 for (int32_t count=0; count<5; count++) { | |
3897 UChar32 c = sample.charAt(count); | |
3898 if (c == -1) { | |
3899 break; | |
3900 } | |
3901 testString.append(c); | |
3902 } | |
3903 } | |
3904 | |
3905 UParseError parseError; | |
3906 Transliterator* anyLatin = | |
3907 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError,
status); | |
3908 if (U_FAILURE(status)) { | |
3909 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__,
u_errorName(status)); | |
3910 return; | |
3911 } | |
3912 | |
3913 logln(UnicodeString("Sample set for Any-Latin: ") + testString); | |
3914 anyLatin->transliterate(testString); | |
3915 logln(UnicodeString("Sample result for Any-Latin: ") + testString); | |
3916 delete anyLatin; | |
3917 } | |
3918 | |
3919 | |
3920 /** | |
3921 * Test the source and target set API. These are only implemented | |
3922 * for RBT and CompoundTransliterator at this time. | |
3923 */ | |
3924 void TransliteratorTest::TestSourceTargetSet() { | |
3925 UErrorCode ec = U_ZERO_ERROR; | |
3926 | |
3927 // Rules | |
3928 const char* r = | |
3929 "a > b; " | |
3930 "r [x{lu}] > q;"; | |
3931 | |
3932 // Expected source | |
3933 UnicodeSet expSrc("[arx{lu}]", ec); | |
3934 | |
3935 // Expected target | |
3936 UnicodeSet expTrg("[bq]", ec); | |
3937 | |
3938 UParseError pe; | |
3939 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWAR
D, pe, ec); | |
3940 | |
3941 if (U_FAILURE(ec)) { | |
3942 delete t; | |
3943 errln("FAIL: Couldn't set up test"); | |
3944 return; | |
3945 } | |
3946 | |
3947 UnicodeSet src; t->getSourceSet(src); | |
3948 UnicodeSet trg; t->getTargetSet(trg); | |
3949 | |
3950 if (src == expSrc && trg == expTrg) { | |
3951 UnicodeString a, b; | |
3952 logln((UnicodeString)"Ok: " + | |
3953 r + " => source = " + src.toPattern(a, TRUE) + | |
3954 ", target = " + trg.toPattern(b, TRUE)); | |
3955 } else { | |
3956 UnicodeString a, b, c, d; | |
3957 errln((UnicodeString)"FAIL: " + | |
3958 r + " => source = " + src.toPattern(a, TRUE) + | |
3959 ", expected " + expSrc.toPattern(b, TRUE) + | |
3960 "; target = " + trg.toPattern(c, TRUE) + | |
3961 ", expected " + expTrg.toPattern(d, TRUE)); | |
3962 } | |
3963 | |
3964 delete t; | |
3965 } | |
3966 | |
3967 /** | |
3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. | |
3969 */ | |
3970 void TransliteratorTest::TestPatternWhiteSpace() { | |
3971 // Rules | |
3972 const char* r = "a > \\u200E b;"; | |
3973 | |
3974 UErrorCode ec = U_ZERO_ERROR; | |
3975 UParseError pe; | |
3976 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeSt
ring(r), UTRANS_FORWARD, pe, ec); | |
3977 | |
3978 if (U_FAILURE(ec)) { | |
3979 errln("FAIL: Couldn't set up test"); | |
3980 } else { | |
3981 expect(*t, "a", "b"); | |
3982 } | |
3983 delete t; | |
3984 | |
3985 // UnicodeSet | |
3986 ec = U_ZERO_ERROR; | |
3987 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec); | |
3988 | |
3989 if (U_FAILURE(ec)) { | |
3990 errln("FAIL: Couldn't set up test"); | |
3991 } else { | |
3992 if (set.contains(0x200E)) { | |
3993 errln("FAIL: U+200E not being ignored by UnicodeSet"); | |
3994 } | |
3995 } | |
3996 } | |
3997 //====================================================================== | |
3998 // this method is in TestUScript.java | |
3999 //====================================================================== | |
4000 void TransliteratorTest::TestAllCodepoints(){ | |
4001 UScriptCode code= USCRIPT_INVALID_CODE; | |
4002 char id[256]={'\0'}; | |
4003 char abbr[256]={'\0'}; | |
4004 char newId[256]={'\0'}; | |
4005 char newAbbrId[256]={'\0'}; | |
4006 char oldId[256]={'\0'}; | |
4007 char oldAbbrId[256]={'\0'}; | |
4008 | |
4009 UErrorCode status =U_ZERO_ERROR; | |
4010 UParseError pe; | |
4011 | |
4012 for(uint32_t i = 0; i<=0x10ffff; i++){ | |
4013 code = uscript_getScript(i,&status); | |
4014 if(code == USCRIPT_INVALID_CODE){ | |
4015 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i); | |
4016 } | |
4017 const char* myId = uscript_getName(code); | |
4018 if(!myId) { | |
4019 dataerrln("Valid script code returned NULL name. Check your data!"); | |
4020 return; | |
4021 } | |
4022 uprv_strcpy(id,myId); | |
4023 uprv_strcpy(abbr,uscript_getShortName(code)); | |
4024 | |
4025 uprv_strcpy(newId,"[:"); | |
4026 uprv_strcat(newId,id); | |
4027 uprv_strcat(newId,":];NFD"); | |
4028 | |
4029 uprv_strcpy(newAbbrId,"[:"); | |
4030 uprv_strcat(newAbbrId,abbr); | |
4031 uprv_strcat(newAbbrId,":];NFD"); | |
4032 | |
4033 if(uprv_strcmp(newId,oldId)!=0){ | |
4034 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORW
ARD,pe,status); | |
4035 if(t==NULL || U_FAILURE(status)){ | |
4036 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - "
+ u_errorName(status)); | |
4037 } | |
4038 delete t; | |
4039 } | |
4040 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){ | |
4041 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_
FORWARD,pe,status); | |
4042 if(t==NULL || U_FAILURE(status)){ | |
4043 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - "
+ u_errorName(status)); | |
4044 } | |
4045 delete t; | |
4046 } | |
4047 uprv_strcpy(oldId,newId); | |
4048 uprv_strcpy(oldAbbrId, newAbbrId); | |
4049 | |
4050 } | |
4051 | |
4052 } | |
4053 | |
4054 #define TEST_TRANSLIT_ID(id, cls) { \ | |
4055 UErrorCode ec = U_ZERO_ERROR; \ | |
4056 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \ | |
4057 if (U_FAILURE(ec)) { \ | |
4058 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \ | |
4059 } else { \ | |
4060 if (t->getDynamicClassID() != cls::getStaticClassID()) { \ | |
4061 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ | |
4062 } \ | |
4063 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ | |
4064 } \ | |
4065 delete t; \ | |
4066 } | |
4067 | |
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \ | |
4069 UErrorCode ec = U_ZERO_ERROR; \ | |
4070 UParseError pe; \ | |
4071 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD,
pe, ec); \ | |
4072 if (U_FAILURE(ec)) { \ | |
4073 errln("FAIL: Couldn't create " rule); \ | |
4074 } else { \ | |
4075 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \ | |
4076 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ | |
4077 } \ | |
4078 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ | |
4079 } \ | |
4080 delete t; \ | |
4081 } | |
4082 | |
4083 void TransliteratorTest::TestBoilerplate() { | |
4084 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator); | |
4085 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator); | |
4086 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator); | |
4087 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator); | |
4088 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator); | |
4089 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator); | |
4090 TEST_TRANSLIT_ID("Null", NullTransliterator); | |
4091 TEST_TRANSLIT_ID("Remove", RemoveTransliterator); | |
4092 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator); | |
4093 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator); | |
4094 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator); | |
4095 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator); | |
4096 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator); | |
4097 } | |
4098 | |
4099 void TransliteratorTest::TestAlternateSyntax() { | |
4100 // U+2206 == & | |
4101 // U+2190 == < | |
4102 // U+2192 == > | |
4103 // U+2194 == <> | |
4104 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"), | |
4105 "abc", | |
4106 "xbz"); | |
4107 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), | |
4108 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), | |
4109 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW
}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}")); | |
4110 } | |
4111 | |
4112 static const char* BEGIN_END_RULES[] = { | |
4113 // [0] | |
4114 "abc > xy;" | |
4115 "aba > z;", | |
4116 | |
4117 // [1] | |
4118 /* | |
4119 "::BEGIN;" | |
4120 "abc > xy;" | |
4121 "::END;" | |
4122 "::BEGIN;" | |
4123 "aba > z;" | |
4124 "::END;", | |
4125 */ | |
4126 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4127 | |
4128 // [2] | |
4129 /* | |
4130 "abc > xy;" | |
4131 "::BEGIN;" | |
4132 "aba > z;" | |
4133 "::END;", | |
4134 */ | |
4135 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4136 | |
4137 // [3] | |
4138 /* | |
4139 "::BEGIN;" | |
4140 "abc > xy;" | |
4141 "::END;" | |
4142 "aba > z;", | |
4143 */ | |
4144 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4145 | |
4146 // [4] | |
4147 "abc > xy;" | |
4148 "::Null;" | |
4149 "aba > z;", | |
4150 | |
4151 // [5] | |
4152 "::Upper;" | |
4153 "ABC > xy;" | |
4154 "AB > x;" | |
4155 "C > z;" | |
4156 "::Upper;" | |
4157 "XYZ > p;" | |
4158 "XY > q;" | |
4159 "Z > r;" | |
4160 "::Upper;", | |
4161 | |
4162 // [6] | |
4163 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4164 "$delim = [\\-$ws];" | |
4165 "$ws $delim* > ' ';" | |
4166 "'-' $delim* > '-';", | |
4167 | |
4168 // [7] | |
4169 "::Null;" | |
4170 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4171 "$delim = [\\-$ws];" | |
4172 "$ws $delim* > ' ';" | |
4173 "'-' $delim* > '-';", | |
4174 | |
4175 // [8] | |
4176 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4177 "$delim = [\\-$ws];" | |
4178 "$ws $delim* > ' ';" | |
4179 "'-' $delim* > '-';" | |
4180 "::Null;", | |
4181 | |
4182 // [9] | |
4183 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4184 "$delim = [\\-$ws];" | |
4185 "::Null;" | |
4186 "$ws $delim* > ' ';" | |
4187 "'-' $delim* > '-';", | |
4188 | |
4189 // [10] | |
4190 /* | |
4191 "::BEGIN;" | |
4192 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4193 "$delim = [\\-$ws];" | |
4194 "::END;" | |
4195 "$ws $delim* > ' ';" | |
4196 "'-' $delim* > '-';", | |
4197 */ | |
4198 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4199 | |
4200 // [11] | |
4201 /* | |
4202 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4203 "$delim = [\\-$ws];" | |
4204 "::BEGIN;" | |
4205 "$ws $delim* > ' ';" | |
4206 "'-' $delim* > '-';" | |
4207 "::END;", | |
4208 */ | |
4209 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4210 | |
4211 // [12] | |
4212 /* | |
4213 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4214 "$delim = [\\-$ws];" | |
4215 "$ab = [ab];" | |
4216 "::BEGIN;" | |
4217 "$ws $delim* > ' ';" | |
4218 "'-' $delim* > '-';" | |
4219 "::END;" | |
4220 "::BEGIN;" | |
4221 "$ab { ' ' } $ab > '-';" | |
4222 "c { ' ' > ;" | |
4223 "::END;" | |
4224 "::BEGIN;" | |
4225 "'a-a' > a\\%|a;" | |
4226 "::END;", | |
4227 */ | |
4228 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4229 | |
4230 // [13] | |
4231 "$ws = [[:Separator:][\\u0009-\\u000C]$];" | |
4232 "$delim = [\\-$ws];" | |
4233 "$ab = [ab];" | |
4234 "::Null;" | |
4235 "$ws $delim* > ' ';" | |
4236 "'-' $delim* > '-';" | |
4237 "::Null;" | |
4238 "$ab { ' ' } $ab > '-';" | |
4239 "c { ' ' > ;" | |
4240 "::Null;" | |
4241 "'a-a' > a\\%|a;", | |
4242 | |
4243 // [14] | |
4244 /* | |
4245 "::[abc];" | |
4246 "::BEGIN;" | |
4247 "abc > xy;" | |
4248 "::END;" | |
4249 "::BEGIN;" | |
4250 "aba > yz;" | |
4251 "::END;" | |
4252 "::Upper;", | |
4253 */ | |
4254 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4255 | |
4256 // [15] | |
4257 "::[abc];" | |
4258 "abc > xy;" | |
4259 "::Null;" | |
4260 "aba > yz;" | |
4261 "::Upper;", | |
4262 | |
4263 // [16] | |
4264 /* | |
4265 "::[abc];" | |
4266 "::BEGIN;" | |
4267 "abc <> xy;" | |
4268 "::END;" | |
4269 "::BEGIN;" | |
4270 "aba <> yz;" | |
4271 "::END;" | |
4272 "::Upper(Lower);" | |
4273 "::([XYZ]);" | |
4274 */ | |
4275 "", // test case commented out below, this is here to keep from messing up t
he indexes | |
4276 | |
4277 // [17] | |
4278 "::[abc];" | |
4279 "abc <> xy;" | |
4280 "::Null;" | |
4281 "aba <> yz;" | |
4282 "::Upper(Lower);" | |
4283 "::([XYZ]);" | |
4284 }; | |
4285 | |
4286 /* | |
4287 (This entire test is commented out below and will need some heavy revision when
we re-add | |
4288 the ::BEGIN/::END stuff) | |
4289 static const char* BOGUS_BEGIN_END_RULES[] = { | |
4290 // [7] | |
4291 "::BEGIN;" | |
4292 "abc > xy;" | |
4293 "::BEGIN;" | |
4294 "aba > z;" | |
4295 "::END;" | |
4296 "::END;", | |
4297 | |
4298 // [8] | |
4299 "abc > xy;" | |
4300 " aba > z;" | |
4301 "::END;", | |
4302 | |
4303 // [9] | |
4304 "::BEGIN;" | |
4305 "::Upper;" | |
4306 "::END;" | |
4307 }; | |
4308 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN
_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0])); | |
4309 */ | |
4310 | |
4311 static const char* BEGIN_END_TEST_CASES[] = { | |
4312 // rules input expected output | |
4313 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", | |
4314 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", | |
4315 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", | |
4316 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", | |
4317 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", | |
4318 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", | |
4319 | |
4320 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", | |
4321 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", | |
4322 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", | |
4323 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", | |
4324 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", | |
4325 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", | |
4326 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", | |
4327 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", | |
4328 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", | |
4329 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", | |
4330 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", | |
4331 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", | |
4332 | |
4333 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", | |
4334 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", | |
4335 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", | |
4336 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" | |
4337 }; | |
4338 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TE
ST_CASES) / sizeof(BEGIN_END_TEST_CASES[0])); | |
4339 | |
4340 void TransliteratorTest::TestBeginEnd() { | |
4341 // run through the list of test cases above | |
4342 int32_t i = 0; | |
4343 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { | |
4344 expect((UnicodeString)"Test case #" + (i / 3), | |
4345 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), | |
4346 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), | |
4347 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); | |
4348 } | |
4349 | |
4350 // instantiate the one reversible rule set in the reverse direction and make
sure it does the right thing | |
4351 UParseError parseError; | |
4352 UErrorCode status = U_ZERO_ERROR; | |
4353 Transliterator* reversed = Transliterator::createFromRules("Reversed", Unic
odeString(BEGIN_END_RULES[17]), | |
4354 UTRANS_REVERSE, parseError, status); | |
4355 if (reversed == 0 || U_FAILURE(status)) { | |
4356 reportParseError(UnicodeString("FAIL: Couldn't create reversed translite
rator"), parseError, status); | |
4357 } else { | |
4358 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy ab
c xaba yz aba")); | |
4359 } | |
4360 delete reversed; | |
4361 | |
4362 // finally, run through the list of syntactically-ill-formed rule sets above
and make sure | |
4363 // that all of them cause errors | |
4364 /* | |
4365 (commented out until we have the real ::BEGIN/::END stuff in place | |
4366 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) { | |
4367 UParseError parseError; | |
4368 UErrorCode status = U_ZERO_ERROR; | |
4369 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString
(BOGUS_BEGIN_END_RULES[i]), | |
4370 UTRANS_FORWARD, parseError, status); | |
4371 if (!U_FAILURE(status)) { | |
4372 delete t; | |
4373 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS
_BEGIN_END_RULES[i]); | |
4374 } | |
4375 } | |
4376 */ | |
4377 } | |
4378 | |
4379 void TransliteratorTest::TestBeginEndToRules() { | |
4380 // run through the same list of test cases we used above, but this time, ins
tead of just | |
4381 // instantiating a Transliterator from the rules and running the test agains
t it, we instantiate | |
4382 // a Transliterator from the rules, do toRules() on it, instantiate a Transl
iterator from | |
4383 // the resulting set of rules, and make sure that the generated rule set is
semantically equivalent | |
4384 // to (i.e., does the same thing as) the original rule set | |
4385 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { | |
4386 UParseError parseError; | |
4387 UErrorCode status = U_ZERO_ERROR; | |
4388 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(
BEGIN_END_TEST_CASES[i], -1, US_INV), | |
4389 UTRANS_FORWARD, parseError, status); | |
4390 if (U_FAILURE(status)) { | |
4391 reportParseError(UnicodeString("FAIL: Couldn't create transliterator
"), parseError, status); | |
4392 } else { | |
4393 UnicodeString rules; | |
4394 t->toRules(rules, TRUE); | |
4395 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)
"Test case #" + (i / 3), rules, | |
4396 UTRANS_FORWARD, parseError, status); | |
4397 if (U_FAILURE(status)) { | |
4398 reportParseError(UnicodeString("FAIL: Couldn't create transliter
ator from generated rules"), | |
4399 parseError, status); | |
4400 delete t; | |
4401 } else { | |
4402 expect(*t2, | |
4403 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), | |
4404 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); | |
4405 delete t; | |
4406 delete t2; | |
4407 } | |
4408 } | |
4409 } | |
4410 | |
4411 // do the same thing for the reversible test case | |
4412 UParseError parseError; | |
4413 UErrorCode status = U_ZERO_ERROR; | |
4414 Transliterator* reversed = Transliterator::createFromRules("Reversed", Unico
deString(BEGIN_END_RULES[17]), | |
4415 UTRANS_REVERSE, parseError, status); | |
4416 if (U_FAILURE(status)) { | |
4417 reportParseError(UnicodeString("FAIL: Couldn't create reversed translite
rator"), parseError, status); | |
4418 } else { | |
4419 UnicodeString rules; | |
4420 reversed->toRules(rules, FALSE); | |
4421 Transliterator* reversed2 = Transliterator::createFromRules("Reversed",
rules, UTRANS_FORWARD, | |
4422 parseError, status); | |
4423 if (U_FAILURE(status)) { | |
4424 reportParseError(UnicodeString("FAIL: Couldn't create reversed trans
literator from generated rules"), | |
4425 parseError, status); | |
4426 delete reversed; | |
4427 } else { | |
4428 expect(*reversed2, | |
4429 UnicodeString("xy XY XYZ yz YZ"), | |
4430 UnicodeString("xy abc xaba yz aba")); | |
4431 delete reversed; | |
4432 delete reversed2; | |
4433 } | |
4434 } | |
4435 } | |
4436 | |
4437 void TransliteratorTest::TestRegisterAlias() { | |
4438 UnicodeString longID("Lower;[aeiou]Upper"); | |
4439 UnicodeString shortID("Any-CapVowels"); | |
4440 UnicodeString reallyShortID("CapVowels"); | |
4441 | |
4442 Transliterator::registerAlias(shortID, longID); | |
4443 | |
4444 UErrorCode err = U_ZERO_ERROR; | |
4445 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD,
err); | |
4446 if (U_FAILURE(err)) { | |
4447 errln("Failed to instantiate transliterator with long ID"); | |
4448 Transliterator::unregister(shortID); | |
4449 return; | |
4450 } | |
4451 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FO
RWARD, err); | |
4452 if (U_FAILURE(err)) { | |
4453 errln("Failed to instantiate transliterator with short ID"); | |
4454 delete t1; | |
4455 Transliterator::unregister(shortID); | |
4456 return; | |
4457 } | |
4458 | |
4459 if (t1->getID() != longID) | |
4460 errln("Transliterator instantiated with long ID doesn't have long ID"); | |
4461 if (t2->getID() != reallyShortID) | |
4462 errln("Transliterator instantiated with short ID doesn't have short ID")
; | |
4463 | |
4464 UnicodeString rules1; | |
4465 UnicodeString rules2; | |
4466 | |
4467 t1->toRules(rules1, TRUE); | |
4468 t2->toRules(rules2, TRUE); | |
4469 if (rules1 != rules2) | |
4470 errln("Alias transliterators aren't the same"); | |
4471 | |
4472 delete t1; | |
4473 delete t2; | |
4474 Transliterator::unregister(shortID); | |
4475 | |
4476 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err); | |
4477 if (U_SUCCESS(err)) { | |
4478 errln("Instantiation with short ID succeeded after short ID was unregist
ered"); | |
4479 delete t1; | |
4480 } | |
4481 | |
4482 // try the same thing again, but this time with something other than | |
4483 // an instance of CompoundTransliterator | |
4484 UnicodeString realID("Latin-Greek"); | |
4485 UnicodeString fakeID("Latin-dlgkjdflkjdl"); | |
4486 Transliterator::registerAlias(fakeID, realID); | |
4487 | |
4488 err = U_ZERO_ERROR; | |
4489 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err); | |
4490 if (U_FAILURE(err)) { | |
4491 dataerrln("Failed to instantiate transliterator with real ID - %s", u_er
rorName(err)); | |
4492 Transliterator::unregister(realID); | |
4493 return; | |
4494 } | |
4495 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err); | |
4496 if (U_FAILURE(err)) { | |
4497 errln("Failed to instantiate transliterator with fake ID"); | |
4498 delete t1; | |
4499 Transliterator::unregister(realID); | |
4500 return; | |
4501 } | |
4502 | |
4503 t1->toRules(rules1, TRUE); | |
4504 t2->toRules(rules2, TRUE); | |
4505 if (rules1 != rules2) | |
4506 errln("Alias transliterators aren't the same"); | |
4507 | |
4508 delete t1; | |
4509 delete t2; | |
4510 Transliterator::unregister(fakeID); | |
4511 } | |
4512 | |
4513 void TransliteratorTest::TestRuleStripping() { | |
4514 /* | |
4515 # | |
4516 \uE001>\u0C01; # SIGN | |
4517 */ | |
4518 static const UChar rule[] = { | |
4519 0x0023,0x0020,0x000D,0x000A, | |
4520 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x
004E,0 | |
4521 }; | |
4522 static const UChar expectedRule[] = { | |
4523 0xE001,0x003E,0x0C01,0x003B,0 | |
4524 }; | |
4525 UChar result[sizeof(rule)/sizeof(rule[0])]; | |
4526 UErrorCode status = U_ZERO_ERROR; | |
4527 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])
), result, &status); | |
4528 if (len != u_strlen(expectedRule)) { | |
4529 errln("utrans_stripRules return len = %d", len); | |
4530 } | |
4531 if (u_strncmp(expectedRule, result, len) != 0) { | |
4532 errln("utrans_stripRules did not return expected string"); | |
4533 } | |
4534 } | |
4535 | |
4536 /** | |
4537 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). | |
4538 */ | |
4539 void TransliteratorTest::TestHalfwidthFullwidth(void) { | |
4540 UParseError parseError; | |
4541 UErrorCode status = U_ZERO_ERROR; | |
4542 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", U
TRANS_FORWARD, parseError, status); | |
4543 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", U
TRANS_FORWARD, parseError, status); | |
4544 if (hf == 0 || fh == 0) { | |
4545 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); | |
4546 delete hf; | |
4547 delete fh; | |
4548 return; | |
4549 } | |
4550 | |
4551 // Array of 2n items | |
4552 // Each item is | |
4553 // "hf"|"fh"|"both", | |
4554 // <Halfwidth>, | |
4555 // <Fullwidth> | |
4556 const char* DATA[] = { | |
4557 "both", | |
4558 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020", | |
4559 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000", | |
4560 }; | |
4561 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); | |
4562 | |
4563 for (int32_t i=0; i<DATA_length; i+=3) { | |
4564 UnicodeString h = CharsToUnicodeString(DATA[i+1]); | |
4565 UnicodeString f = CharsToUnicodeString(DATA[i+2]); | |
4566 switch (*DATA[i]) { | |
4567 case 0x68: //'h': // Halfwidth-Fullwidth only | |
4568 expect(*hf, h, f); | |
4569 break; | |
4570 case 0x66: //'f': // Fullwidth-Halfwidth only | |
4571 expect(*fh, f, h); | |
4572 break; | |
4573 case 0x62: //'b': // both directions | |
4574 expect(*hf, h, f); | |
4575 expect(*fh, f, h); | |
4576 break; | |
4577 } | |
4578 } | |
4579 delete hf; | |
4580 delete fh; | |
4581 } | |
4582 | |
4583 | |
4584 /** | |
4585 * Test Thai. The text is the first paragraph of "What is Unicode" from th
e Unicode.org web site. | |
4586 * TODO: confirm that the expected results are correct. | |
4587 * For now, test just confirms that C++ and Java give identical
results. | |
4588 */ | |
4589 void TransliteratorTest::TestThai(void) { | |
4590 #if !UCONFIG_NO_BREAK_ITERATION | |
4591 UParseError parseError; | |
4592 UErrorCode status = U_ZERO_ERROR; | |
4593 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORW
ARD, parseError, status); | |
4594 if (tr == 0) { | |
4595 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); | |
4596 return; | |
4597 } | |
4598 if (U_FAILURE(status)) { | |
4599 errln("FAIL: createInstance failed with %s", u_errorName(status)); | |
4600 return; | |
4601 } | |
4602 const char *thaiText = | |
4603 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\
\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d" | |
4604 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\
\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22" | |
4605 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\
\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d" | |
4606 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e2
1\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d" | |
4607 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\
\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29" | |
4608 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\
\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42" | |
4609 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\
\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25" | |
4610 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\
\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15" | |
4611 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e3
2\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08" | |
4612 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\
\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49" | |
4613 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e
39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23" | |
4614 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\
\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23" | |
4615 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\
\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48" | |
4616 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encod
ing \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48" | |
4617 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\
\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30" | |
4618 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d:
\\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d" | |
4619 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e0
9\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01" | |
4620 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\
\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e" | |
4621 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\
\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49" | |
4622 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encodin
g \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04" | |
4623 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\
\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19" | |
4624 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e4
1\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43" | |
4625 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\
\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20" | |
4626 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01
\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35" | |
4627 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e
22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b" | |
4628 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\
\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04" | |
4629 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\
\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19" | |
4630 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29
\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40" | |
4631 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\
\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22" | |
4632 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b."; | |
4633 | |
4634 const char *latinText = | |
4635 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w,
khxmphiwtexr\\u0312 ca ke\\u012b\\u0300" | |
4636 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng
t\\u1ea1wlek\\u0304h. khxmphiwtexr" | |
4637 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e
6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304" | |
4638 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\
u0304h h\\u0304\\u0131\\u0302 s\\u0304" | |
4639 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u03
04n\\u0302\\u0101 th\\u012b\\u0300\\u0301" | |
4640 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee
5\\u0302n, d\\u1ecb\\u0302 m\\u012b " | |
4641 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u03
04\\u1ea3h\\u0304r\\u1ea1b k\\u0101" | |
4642 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0
101 n\\u012b\\u0302. m\\u1ecb\\u0300m" | |
4643 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea
1w x\\u1ea1kk\\u0304hra m\\u0101k p" | |
4644 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304hep
h\\u0101a n\\u0131 kl\\u00f9m s\\u0304" | |
4645 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng
de\\u012byw k\\u0306 t\\u0302xngk\\u0101" | |
4646 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u032
3h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131" | |
4647 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\
\u0131 p\\u0323h\\u0101s\\u0304\\u02b9" | |
4648 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\
u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306" | |
4649 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012by
ng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1" | |
4650 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xn
gh\\u0304m\\u0101y wrrkh txn l\\u00e6" | |
4651 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u010
1ng thekhnikh th\\u012b\\u0300 ch\\u0131" | |
4652 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb."; | |
4653 | |
4654 | |
4655 UnicodeString xlitText(thaiText); | |
4656 xlitText = xlitText.unescape(); | |
4657 tr->transliterate(xlitText); | |
4658 | |
4659 UnicodeString expectedText(latinText); | |
4660 expectedText = expectedText.unescape(); | |
4661 expect(*tr, xlitText, expectedText); | |
4662 | |
4663 delete tr; | |
4664 #endif | |
4665 } | |
4666 | |
4667 | |
4668 //====================================================================== | |
4669 // Support methods | |
4670 //====================================================================== | |
4671 void TransliteratorTest::expectT(const UnicodeString& id, | |
4672 const UnicodeString& source, | |
4673 const UnicodeString& expectedResult) { | |
4674 UErrorCode ec = U_ZERO_ERROR; | |
4675 UParseError pe; | |
4676 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, e
c); | |
4677 if (U_FAILURE(ec)) { | |
4678 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorNa
me(ec)); | |
4679 delete t; | |
4680 return; | |
4681 } | |
4682 expect(*t, source, expectedResult); | |
4683 delete t; | |
4684 } | |
4685 | |
4686 void TransliteratorTest::reportParseError(const UnicodeString& message, | |
4687 const UParseError& parseError, | |
4688 const UErrorCode& status) { | |
4689 dataerrln(message + | |
4690 /*", parse error " + parseError.code +*/ | |
4691 ", line " + parseError.line + | |
4692 ", offset " + parseError.offset + | |
4693 ", pre-context " + prettify(parseError.preContext, TRUE) + | |
4694 ", post-context " + prettify(parseError.postContext,TRUE) + | |
4695 ", Error: " + u_errorName(status)); | |
4696 } | |
4697 | |
4698 void TransliteratorTest::expect(const UnicodeString& rules, | |
4699 const UnicodeString& source, | |
4700 const UnicodeString& expectedResult, | |
4701 UTransPosition *pos) { | |
4702 expect("<ID>", rules, source, expectedResult, pos); | |
4703 } | |
4704 | |
4705 void TransliteratorTest::expect(const UnicodeString& id, | |
4706 const UnicodeString& rules, | |
4707 const UnicodeString& source, | |
4708 const UnicodeString& expectedResult, | |
4709 UTransPosition *pos) { | |
4710 UErrorCode status = U_ZERO_ERROR; | |
4711 UParseError parseError; | |
4712 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWAR
D, parseError, status); | |
4713 if (U_FAILURE(status)) { | |
4714 reportParseError(UnicodeString("Couldn't create transliterator from ") +
rules, parseError, status); | |
4715 } else { | |
4716 expect(*t, source, expectedResult, pos); | |
4717 } | |
4718 delete t; | |
4719 } | |
4720 | |
4721 void TransliteratorTest::expect(const Transliterator& t, | |
4722 const UnicodeString& source, | |
4723 const UnicodeString& expectedResult, | |
4724 const Transliterator& reverseTransliterator) { | |
4725 expect(t, source, expectedResult); | |
4726 expect(reverseTransliterator, expectedResult, source); | |
4727 } | |
4728 | |
4729 void TransliteratorTest::expect(const Transliterator& t, | |
4730 const UnicodeString& source, | |
4731 const UnicodeString& expectedResult, | |
4732 UTransPosition *pos) { | |
4733 if (pos == 0) { | |
4734 UnicodeString result(source); | |
4735 t.transliterate(result); | |
4736 expectAux(t.getID() + ":String", source, result, expectedResult); | |
4737 } | |
4738 UTransPosition index={0, 0, 0, 0}; | |
4739 if (pos != 0) { | |
4740 index = *pos; | |
4741 } | |
4742 | |
4743 UnicodeString rsource(source); | |
4744 if (pos == 0) { | |
4745 t.transliterate(rsource); | |
4746 } else { | |
4747 // Do it all at once -- below we do it incrementally | |
4748 t.finishTransliteration(rsource, *pos); | |
4749 } | |
4750 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult); | |
4751 | |
4752 // Test keyboard (incremental) transliteration -- this result | |
4753 // must be the same after we finalize (see below). | |
4754 UnicodeString log; | |
4755 rsource.remove(); | |
4756 if (pos != 0) { | |
4757 rsource = source; | |
4758 formatInput(log, rsource, index); | |
4759 log.append(" -> "); | |
4760 UErrorCode status = U_ZERO_ERROR; | |
4761 t.transliterate(rsource, index, status); | |
4762 formatInput(log, rsource, index); | |
4763 } else { | |
4764 for (int32_t i=0; i<source.length(); ++i) { | |
4765 if (i != 0) { | |
4766 log.append(" + "); | |
4767 } | |
4768 log.append(source.charAt(i)).append(" -> "); | |
4769 UErrorCode status = U_ZERO_ERROR; | |
4770 t.transliterate(rsource, index, source.charAt(i), status); | |
4771 formatInput(log, rsource, index); | |
4772 } | |
4773 } | |
4774 | |
4775 // As a final step in keyboard transliteration, we must call | |
4776 // transliterate to finish off any pending partial matches that | |
4777 // were waiting for more input. | |
4778 t.finishTransliteration(rsource, index); | |
4779 log.append(" => ").append(rsource); | |
4780 | |
4781 expectAux(t.getID() + ":Keyboard", log, | |
4782 rsource == expectedResult, | |
4783 expectedResult); | |
4784 } | |
4785 | |
4786 | |
4787 /** | |
4788 * @param appendTo result is appended to this param. | |
4789 * @param input the string being transliterated | |
4790 * @param pos the index struct | |
4791 */ | |
4792 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo, | |
4793 const UnicodeString& input, | |
4794 const UTransPosition& pos) { | |
4795 // Output a string of the form aaa{bbb|ccc|ddd}eee, where | |
4796 // the {} indicate the context start and limit, and the || | |
4797 // indicate the start and limit. | |
4798 if (0 <= pos.contextStart && | |
4799 pos.contextStart <= pos.start && | |
4800 pos.start <= pos.limit && | |
4801 pos.limit <= pos.contextLimit && | |
4802 pos.contextLimit <= input.length()) { | |
4803 | |
4804 UnicodeString a, b, c, d, e; | |
4805 input.extractBetween(0, pos.contextStart, a); | |
4806 input.extractBetween(pos.contextStart, pos.start, b); | |
4807 input.extractBetween(pos.start, pos.limit, c); | |
4808 input.extractBetween(pos.limit, pos.contextLimit, d); | |
4809 input.extractBetween(pos.contextLimit, input.length(), e); | |
4810 appendTo.append(a).append((UChar)123/*{*/).append(b). | |
4811 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d). | |
4812 append((UChar)125/*}*/).append(e); | |
4813 } else { | |
4814 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" + | |
4815 pos.contextStart + ", s=" + pos.start + ", l=" + | |
4816 pos.limit + ", cl=" + pos.contextLimit + "} on " + | |
4817 input); | |
4818 } | |
4819 return appendTo; | |
4820 } | |
4821 | |
4822 void TransliteratorTest::expectAux(const UnicodeString& tag, | |
4823 const UnicodeString& source, | |
4824 const UnicodeString& result, | |
4825 const UnicodeString& expectedResult) { | |
4826 expectAux(tag, source + " -> " + result, | |
4827 result == expectedResult, | |
4828 expectedResult); | |
4829 } | |
4830 | |
4831 void TransliteratorTest::expectAux(const UnicodeString& tag, | |
4832 const UnicodeString& summary, UBool pass, | |
4833 const UnicodeString& expectedResult) { | |
4834 if (pass) { | |
4835 logln(UnicodeString("(")+tag+") " + prettify(summary)); | |
4836 } else { | |
4837 dataerrln(UnicodeString("FAIL: (")+tag+") " | |
4838 + prettify(summary) | |
4839 + ", expected " + prettify(expectedResult)); | |
4840 } | |
4841 } | |
4842 | |
4843 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
OLD | NEW |